Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -20,21 +20,23 @@ SHELL=/bin/bash PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm \ - server.scm configf.scm db.scm keys.scm margs.scm \ - process.scm runs.scm tasks.scm tests.scm genexample.scm \ - http-transport.scm tdb.scm client.scm mt.scm \ - ezsteps.scm lock-queue.scm rmt.scm api.scm \ - subrun.scm portlogger.scm archive.scm env.scm \ - diff-report.scm cgisetup/models/pgdb.scm + configf.scm db.scm keys.scm margs.scm process.scm runs.scm \ + tasks.scm tests.scm genexample.scm tdb.scm mt.scm \ + ezsteps.scm lock-queue.scm api.scm subrun.scm \ + portlogger.scm archive.scm env.scm diff-report.scm \ + cgisetup/models/pgdb.scm + +# server.scm http-transport.scm client.scm rmt.scm # module source files -MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm +MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \ + servermod.scm clientmod.scm rmtmod.scm -all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt +all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut # dbmod.import.o is just a hack here mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o db.o : dbmod.import.o mofiles/debugprint.o : mofiles/mtargs.o @@ -258,10 +260,22 @@ $(PREFIX)/bin/mtexec : $(PREFIX)/bin/.$(ARCHSTR)/mtexec utils/mk_wrapper utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec chmod a+x $(PREFIX)/bin/mtexec +# mtserv + +mtserv: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtserv.scm + csc $(CSCOPTS) $(OFILES) $(MOFILES) mtserv.scm -o mtserv + +$(PREFIX)/bin/.$(ARCHSTR)/mtserv : mtserv + $(INSTALL) mtserv $(PREFIX)/bin/.$(ARCHSTR)/mtserv + +$(PREFIX)/bin/mtserv : $(PREFIX)/bin/.$(ARCHSTR)/mtserv utils/mk_wrapper + utils/mk_wrapper $(PREFIX) mtserv $(PREFIX)/bin/mtserv + chmod a+x $(PREFIX)/bin/mtserv + # tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt $(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -21,18 +21,19 @@ ;;====================================================================== (use srfi-69 posix) (declare (unit api)) -(declare (uses rmt)) +(declare (uses rmtmod)) (declare (uses db)) (declare (uses dbmod)) (declare (uses dbfile)) (declare (uses tasks)) (import dbmod) (import dbfile) +(import rmtmod) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -28,135 +28,136 @@ (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. -(module client -* - -) - -(import client) - -(include "common_records.scm") -(include "db_records.scm") - -;; client:get-signature -(define (client:get-signature) - (if *my-client-signature* *my-client-signature* - (let ((sig (conc (get-host-name) " " (current-process-id)))) - (set! *my-client-signature* sig) - *my-client-signature*))) - -;; Not currently used! But, I think it *should* be used!!! -#;(define (client:logout serverdat) - (let ((ok (and (socket? serverdat) - (cdb:logout serverdat *toppath* (client:get-signature))))) - ok)) - -;; Do all the connection work, look up the transport type and set up the -;; connection if required. -;; -;; There are two scenarios. -;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline -;; 2. We are a run tests, list runs or other interactive process and we must figure out -;; *transport-type* and *runremote* from the monitor.db -;; -;; client:setup -;; -;; lookup_server, need to remove *runremote* stuff -;; - -;;(define (http-transport:server-dat-make-url runremote) -(define (client:get-url runremote) - (if (and (remote-iface runremote) - (remote-port runremote)) - (conc "http://" - (remote-iface runremote) - ":" - (remote-port runremote)) - #f)) - -(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) - (mutex-lock! *rmt-mutex*) - (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) - (mutex-unlock! *rmt-mutex*) - res)) - -(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0)) - (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) - (server:start-and-wait areapath) - (if (<= remaining-tries 0) - (begin - (debug:print-error 0 *default-log-port* "failed to start or connect to server") - (exit 1)) - ;; - ;; Alternatively here, we can get the list of candidate servers and work our way - ;; through them searching for a good one. - ;; - (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid -;; (runremote (or area-dat *runremote*))) - (if (not server-dat) ;; no server found - (begin - (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time - (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) - (match server-dat - ((host port start-time server-id pid) - (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if (not runremote) - (begin - ;; Here we are creating a runremote where there was none or it was clobbered with #f - ;; - (set! runremote (make-remote)) - (let* ((server-info (server:check-if-running areapath))) - (remote-server-info-set! runremote server-info) - (if server-info - (begin - (remote-server-url-set! runremote (server:record->url server-info)) - (remote-server-id-set! runremote (server:record->id server-info))))))) - ;; at this point we have a runremote - (if (and host port server-id) - (let* ((nada (client:connect host port server-id runremote)) - (ping-res (rmt:login-no-auto-client-setup runremote))) - (if ping-res - (if runremote - (begin - (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote)) - runremote) - (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) - (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 - (http-transport:close-connections runremote) - (thread-sleep! 1) - (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)) - ))) - (begin ;; no server registered - ;; (server:kind-run areapath) - (server:start-and-wait areapath) - (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) - (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))))) - (else - (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat))))))) - -;; -;; connect - stored in remote-condat -;; -;; (define (http-transport:client-connect iface port server-id runremote) -(define (client:connect iface port server-id runremote-in) - (let* ((runremote (or runremote-in - (make-runremote)))) - (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri))) - ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) - (remote-iface-set! runremote iface) - (remote-port-set! runremote port) - (remote-server-id-set! runremote server-id) - (remote-connect-time-set! runremote (current-seconds)) - (remote-last-access-set! runremote (current-seconds)) - (remote-api-url-set! runremote api-url) - (remote-api-uri-set! runremote api-uri) - (remote-api-req-set! runremote api-req) - runremote))) - +;; (module client +;; * +;; +;; ) +;; +;; (import client) +;; +;; (include "common_records.scm") +;; (include "db_records.scm") +;; +;; ;; client:get-signature +;; (define (client:get-signature) +;; (if *my-client-signature* *my-client-signature* +;; (let ((sig (conc (get-host-name) " " (current-process-id)))) +;; (set! *my-client-signature* sig) +;; *my-client-signature*))) +;; +;; ;; Not currently used! But, I think it *should* be used!!! +;; #;(define (client:logout serverdat) +;; (let ((ok (and (socket? serverdat) +;; (cdb:logout serverdat *toppath* (client:get-signature))))) +;; ok)) +;; +;; ;; Do all the connection work, look up the transport type and set up the +;; ;; connection if required. +;; ;; +;; ;; There are two scenarios. +;; ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline +;; ;; 2. We are a run tests, list runs or other interactive process and we must figure out +;; ;; *transport-type* and *runremote* from the monitor.db +;; ;; +;; ;; client:setup +;; ;; +;; ;; lookup_server, need to remove *runremote* stuff +;; ;; +;; +;; ;;(define (http-transport:server-dat-make-url runremote) +;; (define (client:get-url runremote) +;; (if (and (remote-iface runremote) +;; (remote-port runremote)) +;; (conc "http://" +;; (remote-iface runremote) +;; ":" +;; (remote-port runremote)) +;; #f)) +;; +;; (define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) +;; (mutex-lock! *rmt-mutex*) +;; (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) +;; (mutex-unlock! *rmt-mutex*) +;; res)) +;; +;; (define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0)) +;; (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) +;; (server:start-and-wait areapath) +;; (if (<= remaining-tries 0) +;; (begin +;; (debug:print-error 0 *default-log-port* "failed to start or connect to server") +;; (exit 1)) +;; ;; +;; ;; Alternatively here, we can get the list of candidate servers and work our way +;; ;; through them searching for a good one. +;; ;; +;; (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid +;; ;; (runremote (or area-dat *runremote*))) +;; (if (not server-dat) ;; no server found +;; (begin +;; (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time +;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) +;; (match server-dat +;; ((host port start-time server-id pid) +;; (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) +;; (if (not runremote) +;; (begin +;; ;; Here we are creating a runremote where there was none or it was clobbered with #f +;; ;; +;; (set! runremote (make-remote)) +;; (let* ((server-info (server:check-if-running areapath))) +;; (remote-server-info-set! runremote server-info) +;; (if server-info +;; (begin +;; (remote-server-url-set! runremote (server:record->url server-info)) +;; (remote-server-id-set! runremote (server:record->id server-info))))))) +;; ;; at this point we have a runremote +;; (if (and host port server-id) +;; (let* ((nada (client:connect host port server-id runremote)) +;; (ping-res (rmt:login-no-auto-client-setup runremote))) +;; (if ping-res +;; (if runremote +;; (begin +;; (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote)) +;; runremote) +;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) +;; (begin ;; login failed but have a server record, clean out the record and try again +;; (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 +;; (http-transport:close-connections runremote) +;; (thread-sleep! 1) +;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)) +;; ))) +;; (begin ;; no server registered +;; ;; (server:kind-run areapath) +;; (server:start-and-wait areapath) +;; (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) +;; (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. +;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))))) +;; (else +;; (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat))))))) +;; +;; ;; +;; ;; connect - stored in remote-condat +;; ;; +;; ;; (define (http-transport:client-connect iface port server-id runremote) +;; (define (client:connect iface port server-id runremote-in) +;; (let* ((runremote (or runremote-in +;; (make-runremote)))) +;; (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id) +;; (let* ((api-url (conc "http://" iface ":" port "/api")) +;; (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) +;; (api-req (make-request method: 'POST uri: api-uri))) +;; ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) +;; (remote-iface-set! runremote iface) +;; (remote-port-set! runremote port) +;; (remote-server-id-set! runremote server-id) +;; (remote-connect-time-set! runremote (current-seconds)) +;; (remote-last-access-set! runremote (current-seconds)) +;; (remote-api-url-set! runremote api-url) +;; (remote-api-uri-set! runremote api-uri) +;; (remote-api-req-set! runremote api-req) +;; runremote))) +;; +;; ADDED clientmod.scm Index: clientmod.scm ================================================================== --- /dev/null +++ clientmod.scm @@ -0,0 +1,33 @@ + +;; Copyright 2006-2012, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +;;(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5 +;; message-digest matchable spiffy uri-common intarweb http-client +;; spiffy-request-vars uri-common intarweb directory-utils) + +(declare (unit clientmod)) + +(module clientmod +* + +) + Index: dashboard-context-menu.scm ================================================================== --- dashboard-context-menu.scm +++ dashboard-context-menu.scm @@ -34,11 +34,11 @@ (declare (unit dashboard-context-menu)) (declare (uses common)) (declare (uses db)) (declare (uses gutils)) -(declare (uses rmt)) +(declare (uses rmtmod)) (declare (uses ezsteps)) ;; (declare (uses sdb)) ;; (declare (uses filedb)) (declare (uses subrun)) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -33,11 +33,11 @@ (declare (unit dashboard-tests)) (declare (uses common)) (declare (uses db)) (declare (uses gutils)) -(declare (uses rmt)) +(declare (uses rmtmod)) (declare (uses ezsteps)) ;; (declare (uses sdb)) ;; (declare (uses filedb)) (declare (uses subrun)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -51,11 +51,11 @@ (declare (uses dbmod)) ;; (declare (uses debugprint)) (declare (uses dbfile)) (declare (uses keys)) (declare (uses ods)) -(declare (uses client)) +;; (declare (uses client)) (declare (uses mt)) (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") Index: diff-report.scm ================================================================== --- diff-report.scm +++ diff-report.scm @@ -16,11 +16,11 @@ ;; along with Megatest. If not, see . ;; (declare (unit diff-report)) (declare (uses common)) -(declare (uses rmt)) +(declare (uses rmtmod)) (include "common_records.scm") (use matchable) (use fmt) (use ducttape-lib) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -1,6 +1,5 @@ - ;; Copyright 2006-2012, Matthew Welland. ;; ;; This file is part of Megatest. ;; ;; Megatest is free software: you can redistribute it and/or modify @@ -14,680 +13,681 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . -(require-extension (srfi 18) extras tcp s11n) - - -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) - -(use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) - -;; Configurations for server -(tcp-buffer-size 2048) -(max-connections 2048) - +;; (require-extension (srfi 18) extras tcp s11n) +;; +;; +;; (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) +;; +;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) +;; +;; ;; Configurations for server +;; (tcp-buffer-size 2048) +;; (max-connections 2048) +;; (declare (unit http-transport)) - -(declare (uses common)) -(declare (uses db)) -(declare (uses tests)) -(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. -(declare (uses server)) -;; (declare (uses daemon)) -(declare (uses portlogger)) -(declare (uses rmt)) -(declare (uses dbfile)) -(declare (uses commonmod)) - -(include "common_records.scm") -(include "db_records.scm") -(include "js-path.scm") - -(import dbfile commonmod) - -(require-library stml) -(define (http-transport:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; S E R V E R -;; ====================================================================== - -;; Call this to start the actual server -;; - -(define *db:process-queue-mutex* (make-mutex)) - -(define (http-transport:run hostn) - ;; Configurations for server - (tcp-buffer-size 2048) - (max-connections 2048) - (debug:print 2 *default-log-port* "Attempting to start the server ...") - (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (start-port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.server-start"))) - (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) - ;; set some parameters for the server - (root-path (if link-tree-path - link-tree-path - (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! - (handle-directory spiffy-directory-listing) - (handle-exception (lambda (exn chain) - (signal (make-composite-condition - (make-property-condition - 'server - 'message "server error"))))) - - ;; http-transport:handle-directory) ;; simple-directory-handler) - ;; Setup the web server and a /ctrl interface - ;; - (vhost-map `(((* any) . ,(lambda (continue) - ;; open the db on the first call - ;; This is were we set up the database connections - (let* (($ (request-vars source: 'both)) - (dat ($ 'dat)) - (res #f)) - (cond - ((equal? (uri-path (request-uri (current-request))) - '(/ "api")) - (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc - headers: '((content-type text/plain))) - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*)) - ((equal? (uri-path (request-uri (current-request))) - '(/ "")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "json_api")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "runs")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ any)) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "hey")) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "jquery3.1.0.js")) - (send-response body: (http-transport:show-jquery) - headers: '((content-type application/javascript)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "test_log")) - (send-response body: (http-transport:html-test-log $) - headers: '((content-type text/HTML)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "dashboard")) - (send-response body: (http-transport:html-dboard $) - headers: '((content-type text/HTML)))) - (else (continue)))))))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) - (with-output-to-file start-file (lambda ()(print (current-process-id))))) - (http-transport:try-start-server ipaddrstr start-port))) - -;; This is recursively run by http-transport:run until sucessful -;; -(define (http-transport:try-start-server ipaddrstr portnum) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) - (if (not config-use-proxy) - (determine-proxy (constantly #f))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) - (handle-exceptions - exn - (begin - ;; (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (if config-hostname ;; this is a hint to bind directly - (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-") - ;; ipaddrstr - ;; config-hostname)) - (start-server port: portnum)) - (portlogger:open-run-close portlogger:set-port portnum "released") - (debug:print 1 *default-log-port* "INFO: server has been stopped")))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;;====================================================================== -;; C L I E N T S -;;====================================================================== - -(define *http-mutex* (make-mutex)) - -;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here -;; I'm pretty sure it is defunct. - -;; This next block all imported en-mass from the api branch -(define *http-requests-in-progress* 0) -(define *http-connections-next-cleanup* (current-seconds)) - -(define (http-transport:get-time-to-cleanup) - (let ((res #f)) - (mutex-lock! *http-mutex*) - (set! res (> (current-seconds) *http-connections-next-cleanup*)) - (mutex-unlock! *http-mutex*) - res)) - -(define (http-transport:inc-requests-count) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) - ;; Use this opportunity to slow things down iff there are too many requests in flight - (if (> *http-requests-in-progress* 5) - (begin - (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") - (thread-sleep! 1))) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count proc) - (mutex-lock! *http-mutex*) - (proc) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count-and-close-all-connections) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds - (if (> *http-requests-in-progress* 0) - (if (> etime (current-seconds)) - (begin - (thread-sleep! 0.05) - (loop etime)) - (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) - (close-all-connections!))) - (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:inc-requests-and-prep-to-close-all-connections) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) - -;; Send "cmd" with json payload "params" to serverdat and receive result -;; -(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3)) - (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat) - (let* ((fullurl (remote-api-req runremote)) - (res (vector #f "uninitialized")) - (success #t) - (sparams (db:obj->string params transport: 'http)) - (server-id (remote-server-id runremote))) - (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) - - ;; set up the http-client here - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - ;; send the data and get the response - ;; extract the needed info from the http data and - ;; process and return it. - (let* ((send-recieve (lambda () - (mutex-lock! *http-mutex*) - ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) - ;; ((exn http client-error) e (print e))) - (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. - success - (db:string->obj - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (set! success #f) - (if (debug:debug-mode 1) - (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") - (begin - (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) - (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) - (debug:print 0 *default-log-port* " call-chain: " call-chain))) - ;; what if another thread is communicating ok? Can't happen due to mutex - (http-transport:close-connections runremote) - (mutex-unlock! *http-mutex*) - ;; (close-connection! fullurl) - (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key (or server-id "thekey")) - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) - transport: 'http) - 0)) ;; added this speculatively - ;; Shouldn't this be a call to the managed call-all-connections stuff above? - ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections? - (mutex-unlock! *http-mutex*) - )) - (time-out (lambda () - (thread-sleep! 45) - (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") - #f)) - (th1 (make-thread send-recieve "with-input-from-request")) - (th2 (make-thread time-out "time out"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - (vector-set! res 0 success) - (thread-terminate! th2) - (if (vector? res) - (if (vector-ref res 0) ;; this is the first flag or the second flag? - (let* ((res-dat (vector-ref res 1))) - (if (and (string? res-dat) (string-contains res-dat "server-id mismatch")) - (signal (make-composite-condition - (make-property-condition - 'servermismatch - 'message (vector-ref res 1)))) - res)) ;; this is the *inner* vector? seriously? why? - (if (debug:debug-mode 11) - (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it - (print-call-chain (current-error-port)) - (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 11 *default-log-port* " server call chain:") - (pp (vector-ref res 1) (current-error-port)) - (signal (vector-ref res 0))) - res)) - (signal (make-composite-condition - (make-property-condition - 'timeout - 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) - -;; careful closing of connections stored in *runremote* -;; -(define (http-transport:close-connections runremote) - (if (remote? runremote) - (let ((api-dat (remote-api-uri runremote))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (if (args:any-defined? "-server" "-execute" "-run") - (debug:print-info 0 *default-log-port* "Closing connections to "api-dat)) - (if api-dat (close-connection! api-dat)) - (remote-conndat-set! runremote #f) - #t)) - #f)) - -;; run http-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (http-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((servinfofile #f) - (sdat #f) - (no-sync-db (db:open-no-sync-db)) - (tmp-area (common:get-db-tmp-area)) - (started-file (conc tmp-area "/.server-started")) - (server-start-time (current-seconds)) - (server-info (let loop ((start-time (current-seconds)) - (changed #t) - (last-sdat "not this")) - (begin ;; let ((sdat #f)) - (thread-sleep! 0.01) - (debug:print-info 0 *default-log-port* "Waiting for server alive signature") - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if (and sdat - (not changed) - (> (- (current-seconds) start-time) 2)) - (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo")) - (ipaddr (car sdat)) - (port (cadr sdat)) - (servinf (conc servinfodir"/"ipaddr":"port))) - (set! servinfofile servinf) - (if (not (file-exists? servinfodir)) - (create-directory servinfodir #t)) - (with-output-to-file servinf - (lambda () - (let* ((serv-id (server:mk-signature))) - (set! *server-id* serv-id) - (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)) - (print "started: "(seconds->year-week/day-time (current-seconds)))))) - (set! *on-exit-procs* (cons - (lambda () - (delete-file* servinf)) - *on-exit-procs*)) - ;; put data about this server into a simple flat file host.port - (debug:print-info 0 *default-log-port* "Received server alive signature") - sdat) - (begin - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (if sdat - (let* ((ipaddr (car sdat)) - (port (cadr sdat)) - (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port))) - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (exit)) - (exit) - ) - (loop start-time - (equal? sdat last-sdat) - sdat))))))) - (iface (car server-info)) - (port (cadr server-info)) - (last-access 0) - (server-timeout (server:expiration-timeout)) - (server-going #f) - (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - - (handle-exceptions - exn - (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) - (with-output-to-file started-file (lambda ()(print (current-process-id))))) - - (let loop ((count 0) - (server-state 'available) - (bad-sync-count 0) - (start-time (current-milliseconds))) - - ;; Use this opportunity to sync the tmp db to megatest.db - (if (not server-going) ;; *dbstruct-dbs* - (begin - (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!! - (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. - (if (and no-sync-db - (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :) - (begin - (if (common:low-noise-print 120 "sync-all-print") - (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))) - (db:all-db-sync *dbstruct-dbs*) - ))) - - ;; when things go wrong we don't want to be doing the various queries too often - ;; so we strive to run this stuff only every four seconds or so. - (let* ((sync-time (- (current-milliseconds) start-time)) - (rem-time (quotient (- 4000 sync-time) 1000))) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time))) - - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) - - ;; Check that iface and port have not changed (can happen if server port collides) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - - (if (not (equal? sdat (list iface port))) - (let ((new-iface (car sdat)) - (new-port (cadr sdat))) - (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") - (set! iface new-iface) - (set! port new-port) - (if (not *server-id*) - (set! *server-id* (server:mk-signature))) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) - (flush-output *default-log-port*))) - - ;; Transfer *db-last-access* to last-access to use in checking that we are still alive - (mutex-lock! *heartbeat-mutex*) - (set! last-access *db-last-access*) - (mutex-unlock! *heartbeat-mutex*) - - (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) - (begin - (if (not *server-id*) - (set! *server-id* (server:mk-signature))) - (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) - (flush-output *default-log-port*))) - (if (common:low-noise-print 60 "dbstats") - (begin - (debug:print 0 *default-log-port* "Server stats:") - (db:print-current-query-stats))) - (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) - (cond - ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (let ((curr-time (current-seconds))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn) - (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter - (not *server-overloaded*) - (file-exists? servinfofile)) - (change-file-times servinfofile curr-time curr-time))) - (if (and (common:low-noise-print 120 "start new server") - (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another - (begin - (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...") - (server:kind-run *toppath*) - (if (> *api-process-request-count* 100) - (begin - (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile) - (delete-file* servinfofile))))))) - (loop 0 server-state bad-sync-count (current-milliseconds))) - (else - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (http-transport:server-shutdown port))))))) - -(define (http-transport:server-shutdown port) - (begin - ;;(BB> "http-transport:server-shutdown called") - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - ;; - ;; start_shutdown - ;; - (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up - (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 1) - - ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - ;; (debug:print-info 0 *default-log-port* "Average cached write time " - ;; (if (eq? *number-of-writes* 0) - ;; "n/a (no writes)" - ;; (/ *writes-total-delay* - ;; *number-of-writes*)) - ;; " ms") - ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - ;; (debug:print-info 0 *default-log-port* "Average non-cached time " - ;; (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - ;; " ms") - - (db:print-current-query-stats) - #;(common:save-pkt `((action . exit) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - - ;; remove .servinfo file(s) here - - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (exit))) - -;; all routes though here end in exit ... -;; -;; start_server? -;; -(define (http-transport:launch) - ;; check the .servinfo directory, are there other servers running on this - ;; or another host? - (let* ((server-start-is-ok (server:minimal-check *toppath*))) - (if (not server-start-is-ok) - (begin - (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.") - (exit 1)))) - - ;; check that a server start is in progress, pause or exit if so - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit))) - -;; (define (http-transport:server-signal-handler signum) -;; (signal-mask! signum) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; (thread-sleep! 1)) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) - -;;=============================================== -;; Java script -;;=============================================== -(define (http-transport:show-jquery) - (let* ((data (tests:readlines *java-script-lib*))) -(string-join data "\n"))) - - - -;;====================================================================== -;; web pages -;;====================================================================== - -(define (http-transport:html-test-log $) - (let* ((run-id ($ 'runid)) - (test-item ($ 'testname)) - (parts (string-split test-item ":")) - (test-name (car parts)) - - (item-name (if (equal? (length parts) 1) - "" - (cadr parts)))) - ;(print $) -(tests:get-test-log run-id test-name item-name))) - - -(define (http-transport:html-dboard $) - (let* ((page ($ 'page)) - (oup (open-output-string)) - (bdy "--------------------------") - - (ret (tests:dynamic-dboard page))) - (s:output-new oup ret) - (close-output-port oup) - - (set! bdy (get-output-string oup)) - (conc "

Dashboard

" bdy "

" ))) - -(define (http-transport:main-page) - (let ((linkpath (root-path))) - (conc "

" (pathname-strip-directory *toppath*) "

" - "" - "Run area: " *toppath* - "

Server Stats

" - (http-transport:stats-table) - "
" - (http-transport:runs linkpath) - "
" - ;; (http-transport:run-stats) - "" - ))) - -(define (http-transport:stats-table) - (mutex-lock! *heartbeat-mutex*) - (let ((res - (conc "" - ;; "" - "" - "" - "" - ;; "" - "" - "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) - "n/a (no writes)" - (/ *writes-total-delay* - *number-of-writes*)) - " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - " ms
Last access" (seconds->time-string *db-last-access*) "
"))) - (mutex-unlock! *heartbeat-mutex*) - res)) - -(define (http-transport:runs linkpath) - (conc "

Runs

" - (string-intersperse - (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) - (map (lambda (p) - (conc "" p "
")) - files)) - " "))) - -#;(define (http-transport:run-stats) - (let ((stats (open-run-close db:get-running-stats #f))) - (conc "" - (string-intersperse - (map (lambda (stat) - (conc "")) - stats) - " ") - "
" (car stat) "" (cadr stat) "
"))) +;; +;; (declare (uses common)) +;; (declare (uses db)) +;; (declare (uses tests)) +;; (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +;; (declare (uses server)) +;; ;; (declare (uses daemon)) +;; (declare (uses portlogger)) +;; (declare (uses rmt)) +;; (declare (uses dbfile)) +;; (declare (uses commonmod)) +;; +;; (include "common_records.scm") +;; (include "db_records.scm") +;; (include "js-path.scm") +;; +;; (import dbfile commonmod) +;; +;; (require-library stml) +;; (define (http-transport:make-server-url hostport) +;; (if (not hostport) +;; #f +;; (conc "http://" (car hostport) ":" (cadr hostport)))) +;; +;; (define *server-loop-heart-beat* (current-seconds)) +;; +;; ;;====================================================================== +;; ;; S E R V E R +;; ;; ====================================================================== +;; +;; ;; Call this to start the actual server +;; ;; +;; +;; (define *db:process-queue-mutex* (make-mutex)) +;; +;; (define (http-transport:run hostn) +;; ;; Configurations for server +;; (tcp-buffer-size 2048) +;; (max-connections 2048) +;; (debug:print 2 *default-log-port* "Attempting to start the server ...") +;; (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily +;; (hostname (get-host-name)) +;; (ipaddrstr (let ((ipstr (if (string=? "-" hostn) +;; ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") +;; (server:get-best-guess-address hostname) +;; #f))) +;; (if ipstr ipstr hostn))) ;; hostname))) +;; (start-port (portlogger:open-run-close portlogger:find-port)) +;; (link-tree-path (common:get-linktree)) +;; (tmp-area (common:get-db-tmp-area)) +;; (start-file (conc tmp-area "/.server-start"))) +;; (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) +;; ;; set some parameters for the server +;; (root-path (if link-tree-path +;; link-tree-path +;; (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! +;; (handle-directory spiffy-directory-listing) +;; (handle-exception (lambda (exn chain) +;; (signal (make-composite-condition +;; (make-property-condition +;; 'server +;; 'message "server error"))))) +;; +;; ;; http-transport:handle-directory) ;; simple-directory-handler) +;; ;; Setup the web server and a /ctrl interface +;; ;; +;; (vhost-map `(((* any) . ,(lambda (continue) +;; ;; open the db on the first call +;; ;; This is were we set up the database connections +;; (let* (($ (request-vars source: 'both)) +;; (dat ($ 'dat)) +;; (res #f)) +;; (cond +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "api")) +;; (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc +;; headers: '((content-type text/plain))) +;; (mutex-lock! *heartbeat-mutex*) +;; (set! *db-last-access* (current-seconds)) +;; (mutex-unlock! *heartbeat-mutex*)) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "")) +;; (send-response body: (http-transport:main-page))) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "json_api")) +;; (send-response body: (http-transport:main-page))) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "runs")) +;; (send-response body: (http-transport:main-page))) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ any)) +;; (send-response body: "hey there!\n" +;; headers: '((content-type text/plain)))) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "hey")) +;; (send-response body: "hey there!\n" +;; headers: '((content-type text/plain)))) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "jquery3.1.0.js")) +;; (send-response body: (http-transport:show-jquery) +;; headers: '((content-type application/javascript)))) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "test_log")) +;; (send-response body: (http-transport:html-test-log $) +;; headers: '((content-type text/HTML)))) +;; ((equal? (uri-path (request-uri (current-request))) +;; '(/ "dashboard")) +;; (send-response body: (http-transport:html-dboard $) +;; headers: '((content-type text/HTML)))) +;; (else (continue)))))))) +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) +;; (with-output-to-file start-file (lambda ()(print (current-process-id))))) +;; (http-transport:try-start-server ipaddrstr start-port))) +;; +;; ;; This is recursively run by http-transport:run until sucessful +;; ;; +;; (define (http-transport:try-start-server ipaddrstr portnum) +;; (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) +;; (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) +;; (if (not config-use-proxy) +;; (determine-proxy (constantly #f))) +;; (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) +;; (handle-exceptions +;; exn +;; (begin +;; ;; (print-error-message exn) +;; (if (< portnum 64000) +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") +;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) +;; (debug:print 5 *default-log-port* "exn=" (condition->list exn)) +;; (portlogger:open-run-close portlogger:set-failed portnum) +;; (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") +;; (thread-sleep! 0.1) +;; +;; ;; get_next_port goes here +;; (http-transport:try-start-server ipaddrstr +;; (portlogger:open-run-close portlogger:find-port))) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server")))) +;; ;; any error in following steps will result in a retry +;; (set! *server-info* (list ipaddrstr portnum)) +;; (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) +;; ;; This starts the spiffy server +;; ;; NEED WAY TO SET IP TO #f TO BIND ALL +;; ;; (start-server bind-address: ipaddrstr port: portnum) +;; (if config-hostname ;; this is a hint to bind directly +;; (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-") +;; ;; ipaddrstr +;; ;; config-hostname)) +;; (start-server port: portnum)) +;; (portlogger:open-run-close portlogger:set-port portnum "released") +;; (debug:print 1 *default-log-port* "INFO: server has been stopped")))) +;; +;; ;;====================================================================== +;; ;; S E R V E R U T I L I T I E S +;; ;;====================================================================== +;; +;; ;;====================================================================== +;; ;; C L I E N T S +;; ;;====================================================================== +;; +;; (define *http-mutex* (make-mutex)) +;; +;; ;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here +;; ;; I'm pretty sure it is defunct. +;; +;; ;; This next block all imported en-mass from the api branch +;; (define *http-requests-in-progress* 0) +;; (define *http-connections-next-cleanup* (current-seconds)) +;; +;; (define (http-transport:get-time-to-cleanup) +;; (let ((res #f)) +;; (mutex-lock! *http-mutex*) +;; (set! res (> (current-seconds) *http-connections-next-cleanup*)) +;; (mutex-unlock! *http-mutex*) +;; res)) +;; +;; (define (http-transport:inc-requests-count) +;; (mutex-lock! *http-mutex*) +;; (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) +;; ;; Use this opportunity to slow things down iff there are too many requests in flight +;; (if (> *http-requests-in-progress* 5) +;; (begin +;; (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") +;; (thread-sleep! 1))) +;; (mutex-unlock! *http-mutex*)) +;; +;; (define (http-transport:dec-requests-count proc) +;; (mutex-lock! *http-mutex*) +;; (proc) +;; (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) +;; (mutex-unlock! *http-mutex*)) +;; +;; (define (http-transport:dec-requests-count-and-close-all-connections) +;; (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) +;; (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds +;; (if (> *http-requests-in-progress* 0) +;; (if (> etime (current-seconds)) +;; (begin +;; (thread-sleep! 0.05) +;; (loop etime)) +;; (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) +;; (close-all-connections!))) +;; (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) +;; (mutex-unlock! *http-mutex*)) +;; +;; (define (http-transport:inc-requests-and-prep-to-close-all-connections) +;; (mutex-lock! *http-mutex*) +;; (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) +;; +;; ;; Send "cmd" with json payload "params" to serverdat and receive result +;; ;; +;; (define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3)) +;; (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat) +;; (let* ((fullurl (remote-api-req runremote)) +;; (res (vector #f "uninitialized")) +;; (success #t) +;; (sparams (db:obj->string params transport: 'http)) +;; (server-id (remote-server-id runremote))) +;; (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) +;; +;; ;; set up the http-client here +;; (max-retry-attempts 1) +;; ;; consider all requests indempotent +;; (retry-request? (lambda (request) +;; #f)) +;; ;; send the data and get the response +;; ;; extract the needed info from the http data and +;; ;; process and return it. +;; (let* ((send-recieve (lambda () +;; (mutex-lock! *http-mutex*) +;; ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) +;; ;; ((exn http client-error) e (print e))) +;; (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. +;; success +;; (db:string->obj +;; (handle-exceptions +;; exn +;; (let ((call-chain (get-call-chain)) +;; (msg ((condition-property-accessor 'exn 'message) exn))) +;; (set! success #f) +;; (if (debug:debug-mode 1) +;; (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") +;; (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) +;; (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) +;; (debug:print 0 *default-log-port* " call-chain: " call-chain))) +;; ;; what if another thread is communicating ok? Can't happen due to mutex +;; (http-transport:close-connections runremote) +;; (mutex-unlock! *http-mutex*) +;; ;; (close-connection! fullurl) +;; (db:obj->string #f)) +;; (with-input-from-request ;; was dat +;; fullurl +;; (list (cons 'key (or server-id "thekey")) +;; (cons 'cmd cmd) +;; (cons 'params sparams)) +;; read-string)) +;; transport: 'http) +;; 0)) ;; added this speculatively +;; ;; Shouldn't this be a call to the managed call-all-connections stuff above? +;; ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections? +;; (mutex-unlock! *http-mutex*) +;; )) +;; (time-out (lambda () +;; (thread-sleep! 45) +;; (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") +;; #f)) +;; (th1 (make-thread send-recieve "with-input-from-request")) +;; (th2 (make-thread time-out "time out"))) +;; (thread-start! th1) +;; (thread-start! th2) +;; (thread-join! th1) +;; (vector-set! res 0 success) +;; (thread-terminate! th2) +;; (if (vector? res) +;; (if (vector-ref res 0) ;; this is the first flag or the second flag? +;; (let* ((res-dat (vector-ref res 1))) +;; (if (and (string? res-dat) (string-contains res-dat "server-id mismatch")) +;; (signal (make-composite-condition +;; (make-property-condition +;; 'servermismatch +;; 'message (vector-ref res 1)))) +;; res)) ;; this is the *inner* vector? seriously? why? +;; (if (debug:debug-mode 11) +;; (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it +;; (print-call-chain (current-error-port)) +;; (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) +;; (debug:print 11 *default-log-port* " server call chain:") +;; (pp (vector-ref res 1) (current-error-port)) +;; (signal (vector-ref res 0))) +;; res)) +;; (signal (make-composite-condition +;; (make-property-condition +;; 'timeout +;; 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) +;; +;; ;; careful closing of connections stored in *runremote* +;; ;; +;; (define (http-transport:close-connections runremote) +;; (if (remote? runremote) +;; (let ((api-dat (remote-api-uri runremote))) +;; (handle-exceptions +;; exn +;; (begin +;; (print-call-chain *default-log-port*) +;; (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) +;; (if (args:any-defined? "-server" "-execute" "-run") +;; (debug:print-info 0 *default-log-port* "Closing connections to "api-dat)) +;; (if api-dat (close-connection! api-dat)) +;; (remote-conndat-set! runremote #f) +;; #t)) +;; #f)) +;; +;; ;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; ;; used and to shutdown after sometime if it is not. +;; ;; +;; (define (http-transport:keep-running) +;; ;; if none running or if > 20 seconds since +;; ;; server last used then start shutdown +;; ;; This thread waits for the server to come alive +;; (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") +;; (let* ((servinfofile #f) +;; (sdat #f) +;; (no-sync-db (db:open-no-sync-db)) +;; (tmp-area (common:get-db-tmp-area)) +;; (started-file (conc tmp-area "/.server-started")) +;; (server-start-time (current-seconds)) +;; (server-info (let loop ((start-time (current-seconds)) +;; (changed #t) +;; (last-sdat "not this")) +;; (begin ;; let ((sdat #f)) +;; (thread-sleep! 0.01) +;; (debug:print-info 0 *default-log-port* "Waiting for server alive signature") +;; (mutex-lock! *heartbeat-mutex*) +;; (set! sdat *server-info*) +;; (mutex-unlock! *heartbeat-mutex*) +;; (if (and sdat +;; (not changed) +;; (> (- (current-seconds) start-time) 2)) +;; (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo")) +;; (ipaddr (car sdat)) +;; (port (cadr sdat)) +;; (servinf (conc servinfodir"/"ipaddr":"port))) +;; (set! servinfofile servinf) +;; (if (not (file-exists? servinfodir)) +;; (create-directory servinfodir #t)) +;; (with-output-to-file servinf +;; (lambda () +;; (let* ((serv-id (server:mk-signature))) +;; (set! *server-id* serv-id) +;; (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)) +;; (print "started: "(seconds->year-week/day-time (current-seconds)))))) +;; (set! *on-exit-procs* (cons +;; (lambda () +;; (delete-file* servinf)) +;; *on-exit-procs*)) +;; ;; put data about this server into a simple flat file host.port +;; (debug:print-info 0 *default-log-port* "Received server alive signature") +;; sdat) +;; (begin +;; (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) +;; (sleep 4) +;; (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes +;; (if sdat +;; (let* ((ipaddr (car sdat)) +;; (port (cadr sdat)) +;; (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port))) +;; (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") +;; (exit)) +;; (exit) +;; ) +;; (loop start-time +;; (equal? sdat last-sdat) +;; sdat))))))) +;; (iface (car server-info)) +;; (port (cadr server-info)) +;; (last-access 0) +;; (server-timeout (server:expiration-timeout)) +;; (server-going #f) +;; (server-log-file (args:get-arg "-log"))) ;; always set when we are a server +;; +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) +;; (with-output-to-file started-file (lambda ()(print (current-process-id))))) +;; +;; (let loop ((count 0) +;; (server-state 'available) +;; (bad-sync-count 0) +;; (start-time (current-milliseconds))) +;; +;; ;; Use this opportunity to sync the tmp db to megatest.db +;; (if (not server-going) ;; *dbstruct-dbs* +;; (begin +;; (debug:print 0 *default-log-port* "SERVER: dbprep") +;; (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!! +;; (set! server-going #t) +;; (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. +;; (if (and no-sync-db +;; (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :) +;; (begin +;; (if (common:low-noise-print 120 "sync-all-print") +;; (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))) +;; (db:all-db-sync *dbstruct-dbs*) +;; ))) +;; +;; ;; when things go wrong we don't want to be doing the various queries too often +;; ;; so we strive to run this stuff only every four seconds or so. +;; (let* ((sync-time (- (current-milliseconds) start-time)) +;; (rem-time (quotient (- 4000 sync-time) 1000))) +;; (if (and (<= rem-time 4) +;; (> rem-time 0)) +;; (thread-sleep! rem-time))) +;; +;; (if (< count 1) ;; 3x3 = 9 secs aprox +;; (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) +;; +;; ;; Check that iface and port have not changed (can happen if server port collides) +;; (mutex-lock! *heartbeat-mutex*) +;; (set! sdat *server-info*) +;; (mutex-unlock! *heartbeat-mutex*) +;; +;; (if (not (equal? sdat (list iface port))) +;; (let ((new-iface (car sdat)) +;; (new-port (cadr sdat))) +;; (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") +;; (set! iface new-iface) +;; (set! port new-port) +;; (if (not *server-id*) +;; (set! *server-id* (server:mk-signature))) +;; (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) +;; (flush-output *default-log-port*))) +;; +;; ;; Transfer *db-last-access* to last-access to use in checking that we are still alive +;; (mutex-lock! *heartbeat-mutex*) +;; (set! last-access *db-last-access*) +;; (mutex-unlock! *heartbeat-mutex*) +;; +;; (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) +;; (begin +;; (if (not *server-id*) +;; (set! *server-id* (server:mk-signature))) +;; (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) +;; (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) +;; (flush-output *default-log-port*))) +;; (if (common:low-noise-print 60 "dbstats") +;; (begin +;; (debug:print 0 *default-log-port* "Server stats:") +;; (db:print-current-query-stats))) +;; (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) +;; (cond +;; ((and *server-run* +;; (> (+ last-access server-timeout) +;; (current-seconds))) +;; (if (common:low-noise-print 120 "server continuing") +;; (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) +;; (let ((curr-time (current-seconds))) +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn) +;; (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter +;; (not *server-overloaded*) +;; (file-exists? servinfofile)) +;; (change-file-times servinfofile curr-time curr-time))) +;; (if (and (common:low-noise-print 120 "start new server") +;; (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another +;; (begin +;; (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...") +;; (server:kind-run *toppath*) +;; (if (> *api-process-request-count* 100) +;; (begin +;; (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile) +;; (delete-file* servinfofile))))))) +;; (loop 0 server-state bad-sync-count (current-milliseconds))) +;; (else +;; (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) +;; (http-transport:server-shutdown port))))))) +;; +;; (define (http-transport:server-shutdown port) +;; (begin +;; ;;(BB> "http-transport:server-shutdown called") +;; (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) +;; ;; +;; ;; start_shutdown +;; ;; +;; (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up +;; (portlogger:open-run-close portlogger:set-port port "released") +;; (thread-sleep! 1) +;; +;; ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) +;; ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) +;; ;; (debug:print-info 0 *default-log-port* "Average cached write time " +;; ;; (if (eq? *number-of-writes* 0) +;; ;; "n/a (no writes)" +;; ;; (/ *writes-total-delay* +;; ;; *number-of-writes*)) +;; ;; " ms") +;; ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) +;; ;; (debug:print-info 0 *default-log-port* "Average non-cached time " +;; ;; (if (eq? *number-non-write-queries* 0) +;; ;; "n/a (no queries)" +;; ;; (/ *total-non-write-delay* +;; ;; *number-non-write-queries*)) +;; ;; " ms") +;; +;; (db:print-current-query-stats) +;; #;(common:save-pkt `((action . exit) +;; (T . server) +;; (pid . ,(current-process-id))) +;; *configdat* #t) +;; +;; ;; remove .servinfo file(s) here +;; +;; (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") +;; (exit))) +;; +;; ;; all routes though here end in exit ... +;; ;; +;; ;; start_server? +;; ;; +;; (define (http-transport:launch) +;; ;; check the .servinfo directory, are there other servers running on this +;; ;; or another host? +;; (let* ((server-start-is-ok (server:minimal-check *toppath*))) +;; (if (not server-start-is-ok) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.") +;; (exit 1)))) +;; +;; ;; check that a server start is in progress, pause or exit if so +;; (let* ((th2 (make-thread (lambda () +;; (debug:print-info 0 *default-log-port* "Server run thread started") +;; (http-transport:run +;; (if (args:get-arg "-server") +;; (args:get-arg "-server") +;; "-") +;; )) "Server run")) +;; (th3 (make-thread (lambda () +;; (debug:print-info 0 *default-log-port* "Server monitor thread started") +;; (http-transport:keep-running) +;; "Keep running")))) +;; (thread-start! th2) +;; (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. +;; (thread-start! th3) +;; (set! *didsomething* #t) +;; (thread-join! th2) +;; (exit))) +;; +;; ;; (define (http-transport:server-signal-handler signum) +;; ;; (signal-mask! signum) +;; ;; (handle-exceptions +;; ;; exn +;; ;; (debug:print 0 *default-log-port* " ... exiting ...") +;; ;; (let ((th1 (make-thread (lambda () +;; ;; (thread-sleep! 1)) +;; ;; "eat response")) +;; ;; (th2 (make-thread (lambda () +;; ;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") +;; ;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff +;; ;; (debug:print 0 *default-log-port* " Done.") +;; ;; (exit 4)) +;; ;; "exit on ^C timer"))) +;; ;; (thread-start! th2) +;; ;; (thread-start! th1) +;; ;; (thread-join! th2)))) +;; +;; ;;=============================================== +;; ;; Java script +;; ;;=============================================== +;; (define (http-transport:show-jquery) +;; (let* ((data (tests:readlines *java-script-lib*))) +;; (string-join data "\n"))) +;; +;; +;; +;; ;;====================================================================== +;; ;; web pages +;; ;;====================================================================== +;; +;; (define (http-transport:html-test-log $) +;; (let* ((run-id ($ 'runid)) +;; (test-item ($ 'testname)) +;; (parts (string-split test-item ":")) +;; (test-name (car parts)) +;; +;; (item-name (if (equal? (length parts) 1) +;; "" +;; (cadr parts)))) +;; ;(print $) +;; (tests:get-test-log run-id test-name item-name))) +;; +;; +;; (define (http-transport:html-dboard $) +;; (let* ((page ($ 'page)) +;; (oup (open-output-string)) +;; (bdy "--------------------------") +;; +;; (ret (tests:dynamic-dboard page))) +;; (s:output-new oup ret) +;; (close-output-port oup) +;; +;; (set! bdy (get-output-string oup)) +;; (conc "

Dashboard

" bdy "

" ))) +;; +;; (define (http-transport:main-page) +;; (let ((linkpath (root-path))) +;; (conc "

" (pathname-strip-directory *toppath*) "

" +;; "" +;; "Run area: " *toppath* +;; "

Server Stats

" +;; (http-transport:stats-table) +;; "
" +;; (http-transport:runs linkpath) +;; "
" +;; ;; (http-transport:run-stats) +;; "" +;; ))) +;; +;; (define (http-transport:stats-table) +;; (mutex-lock! *heartbeat-mutex*) +;; (let ((res +;; (conc "" +;; ;; "" +;; "" +;; "" +;; "" +;; ;; "" +;; "" +;; "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) +;; "n/a (no writes)" +;; (/ *writes-total-delay* +;; *number-of-writes*)) +;; " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) +;; ;; "n/a (no queries)" +;; ;; (/ *total-non-write-delay* +;; ;; *number-non-write-queries*)) +;; " ms
Last access" (seconds->time-string *db-last-access*) "
"))) +;; (mutex-unlock! *heartbeat-mutex*) +;; res)) +;; +;; (define (http-transport:runs linkpath) +;; (conc "

Runs

" +;; (string-intersperse +;; (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) +;; (map (lambda (p) +;; (conc "" p "
")) +;; files)) +;; " "))) +;; +;; #;(define (http-transport:run-stats) +;; (let ((stats (open-run-close db:get-running-stats #f))) +;; (conc "" +;; (string-intersperse +;; (map (lambda (stat) +;; (conc "")) +;; stats) +;; " ") +;; "
" (car stat) "" (cadr stat) "
"))) +;; Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -25,12 +25,10 @@ (declare (uses common)) ;; (declare (uses megatest-version)) (declare (uses margs)) (declare (uses runs)) (declare (uses launch)) -(declare (uses server)) -(declare (uses client)) (declare (uses tests)) (declare (uses genexample)) ;; (declare (uses daemon)) (declare (uses db)) @@ -43,10 +41,13 @@ (declare (uses env)) (declare (uses diff-report)) (declare (uses db)) (declare (uses dbmod)) (declare (uses dbmod.import)) +(declare (uses rmtmod)) +(declare (uses clientmod)) +(declare (uses servermod)) (declare (uses commonmod)) (declare (uses commonmod.import)) (declare (uses dbfile)) (declare (uses dbfile.import)) ;; (declare (uses debugprint)) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -24,13 +24,13 @@ (declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) -(declare (uses server)) +(declare (uses servermod)) (declare (uses runs)) -(declare (uses rmt)) +(declare (uses rmtmod)) ;; (declare (uses filedb)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") ADDED mtserv.scm Index: mtserv.scm ================================================================== --- /dev/null +++ mtserv.scm @@ -0,0 +1,94 @@ +; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +;; (include "common.scm") +;; (include "megatest-version.scm") + +;; fake out readline usage of toplevel-command +(define (toplevel-command . a) #f) + +(use srfi-1 posix srfi-69 readline ;; regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras) + srfi-19 srfi-18 extras format pkts regex regex-case + (prefix dbi dbi:) + ) + +;; (declare (uses common)) +(declare (uses margs)) +(declare (uses configf)) +;; (declare (uses rmt)) + +;; (use ducttape-lib) +(include "megatest-version.scm") +(include "megatest-fossil-hash.scm") + +;; (require-library stml) + +(define help (conc " +mtutil, part of the Megatest tool suite, documentation at http://www.kiatoa.com/fossils/megatest + version " megatest-version " + license GPL, Copyright Matt Welland 2006-2017 + +Usage: mtutil action [options] + -h : this help + -manual : show the Megatest user manual + -version : print megatest version (currently " megatest-version ") + +Examples: + +Called as " (string-intersperse (argv) " ") " +Version " megatest-version ", built from " megatest-fossil-hash )) + ;; first token is our action, but only if no leading dash + +(define *action* (if (and (> (length (argv)) 1) + (not (string-match "^\\-.*" (cadr (argv))))) + (cadr (argv)) + #f)) + +(define *remargs* + (args:get-args + (if *action* (cdr (argv)) (argv)) + '("-log") + '("-h") + args:arg-hash + 0)) + +(if (args:get-arg "-h") + (begin + (print help) + (exit))) + +(if (or (args:get-arg "-repl") + (args:get-arg "-load")) + (begin + (import extras) ;; might not be needed + ;; (import csi) + (import readline) + (import apropos) + ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ... + + (install-history-file (get-environment-variable "HOME") ".mtutil_history") ;; [homedir] [filename] [nlines]) + (current-input-port (make-readline-port "mtutil> ")) + (if (args:get-arg "-repl") + (repl) + (load (args:get-arg "-load"))))) + +#| +(define mtconf (car (simple-setup #f))) +(define dat (common:with-queue-db mtconf (lambda (conn)(get-pkts conn '())))) +(pp (pkts#flatten-all dat '((cmd . ((parent . P)(url . M)))(runtype . ((parent . P)))) 'id 'group-id 'uuid 'parent 'pkt-type 'pkt 'processed)) +|# Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -22,1056 +22,1058 @@ (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (declare (uses dbfile)) -(include "common_records.scm") -;; (declare (uses rmtmod)) - -(import dbfile) ;; rmtmod) - -;; -;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! -;; - -;; generate entries for ~/.megatestrc with the following -;; -;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u - -;;====================================================================== -;; S U P P O R T F U N C T I O N S -;;====================================================================== - -;; if a server is either running or in the process of starting call client:setup -;; else return #f to let the calling proc know that there is no server available -;; -(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down. - (let* ((cinfo (if (remote? runremote) - (remote-conndat runremote) - #f))) - (if cinfo - cinfo - (if (server:check-if-running areapath) - (client:setup areapath runremote) - #f)))) - -(define (rmt:on-homehost? runremote) - (let* ((hh-dat (remote-hh-dat runremote))) - (if (pair? hh-dat) - (cdr hh-dat) - (begin - (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) - #f)))) - - -;;====================================================================== - -(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id - -;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) -;; -(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - - #;(common:telemetry-log (conc "rmt:"(->string cmd)) - payload: `((rid . ,rid) - (params . ,params))) - - (if (> attemptnum 2) - (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) - - (cond - ((> attemptnum 2) (thread-sleep! 0.05)) - ((> attemptnum 10) (thread-sleep! 0.5)) - ((> attemptnum 20) (thread-sleep! 1))) - (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) - (begin (server:run *toppath*) (thread-sleep! 3))) - - - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } - ;; do all the prep locked under the rmt-mutex - (mutex-lock! *rmt-mutex*) - - ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote - ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. - ;; 3. do the query, if on homehost use local access - ;; - (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value - (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas - (runremote (or area-dat - *runremote*)) - (attemptnum (+ 1 attemptnum)) - (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) - - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; - ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; - ;; ensure we have a homehost record - (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost - (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little - (let ((hh-data (server:choose-server areapath 'homehost))) - (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) - - ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) - (cond - #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds - (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") - (set! *runremote* #f) - ;; BUG: close-connections should go here? - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) - - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } - ;; give up if more than 150 attempts - ((> attemptnum 150) - (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") - (exit 1)) - - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; - ;; readonly mode, read request- handle it - case 2 - ((and readonly-mode - (member cmd api:read-only-queries)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") - (rmt:open-qry-close-locally cmd 0 params) - ) - - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; - ;; readonly mode, write request. Do nothing, return #f - (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) - - ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. - ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. - ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) - ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; - ;; reset the connection if it has been unused too long - ((and runremote - ;; (remote-conndat runremote) - (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on - (+ (remote-last-access runremote) - (remote-server-timeout runremote)))) - (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") - (http-transport:close-connections runremote) - ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections - ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; - - ;; on homehost and this is a read - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (rmt:on-homehost? runremote) - (member cmd api:read-only-queries)) ;; this is a read - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; - ;; on homehost and this is a write, we already have a server, but server has died - - ;; reinstate this keep-alive section but inject a time condition into the (add ... - ;; - ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost - ;; (not (member cmd api:read-only-queries)) ;; this is a write - ;; (remote-server-url runremote) ;; have a server - ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") - ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up - ;; (set! *runremote* (make-remote)) - ;; (let* ((server-info (remote-server-info *runremote*))) - ;; (if server-info - ;; (begin - ;; (remote-server-url-set! *runremote* (server:record->url server-info)) - ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) - ;; (remote-force-server-set! runremote (common:force-server?)) - ;; (mutex-unlock! *rmt-mutex*) - ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; - ;; on homehost and this is a write, we already have a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write - (remote-server-url runremote)) ;; have a server - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; - ;; on homehost, no server contact made and this is a write, passively start a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; have homehost - (not (remote-server-url runremote)) ;; no connection yet - (not (member cmd api:read-only-queries))) ;; not a read-only query - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") - (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call - (if server-info - (begin - (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed - (remote-server-id-set! runremote (server:record->id server-info))) - (if (common:force-server?) - (server:start-and-wait *toppath*) - (server:kind-run *toppath*))) - (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") - (rmt:open-qry-close-locally cmd 0 params))) - - ;;DOT CASE9 [label="force server\nnot on homehost"]; - ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; - ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; - ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one - (not (remote-conndat runremote))) - (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost - (not (remote-conndat runremote)))) ;; and no connection - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) - (mutex-unlock! *rmt-mutex*) - (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? - (server:start-and-wait *toppath*)) - (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http - (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as - - ;;DOT CASE10 [label="on homehost"]; - ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; - ;;DOT CASE10 -> "rmt:open-qry-close-locally"; - ;; all set up if get this far, dispatch the query - ((and (not (remote-force-server runremote)) - (cdr (remote-hh-dat runremote))) ;; we are on homehost - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") - (rmt:open-qry-close-locally cmd (if rid rid 0) params)) - - ;;DOT CASE11 [label="send_receive"]; - ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; - ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; - ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; - ;; not on homehost, do server query - (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) - ;;DOT } - -;; bunch of small functions factored out of send-receive to make debug easier -;; - -(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) - ;; (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") - ;; (mutex-lock! *rmt-mutex*) - (let* ((conninfo (remote-conndat runremote)) - (dat-in (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:client-api-send-receive 0 runremote cmd params) - ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote) - ((servermismatch) (vector #f "Server id mismatch" )) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (dat (if (and (vector? dat-in) ;; ... check it is a correct size - (> (vector-length dat-in) 1)) - dat-in - (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) - (success (if (vector? dat) (vector-ref dat 0) #f)) - (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (remote-last-access-set! runremote (current-seconds)) ;; refresh access time - (begin - (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) - (set! conninfo #f) - (http-transport:close-connections runremote))) - (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) - (mutex-unlock! *rmt-mutex*) - (if success ;; success only tells us that the transport was - ;; successful, have to examine the data to see if - ;; there was a detected issue at the other end - (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (begin - (debug:print-error 0 *default-log-port* " dat=" dat) - (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) - ))) - -(define (rmt:print-db-stats) - (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" - (debug:print 18 *default-log-port* "DB Stats\n========") - (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) - (for-each (lambda (cmd) - (let ((cmd-dat (hash-table-ref *db-stats* cmd))) - (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) - (sort (hash-table-keys *db-stats*) - (lambda (a b) - (> (vector-ref (hash-table-ref *db-stats* a) 0) - (vector-ref (hash-table-ref *db-stats* b) 0))))))) - -(define (rmt:get-max-query-average run-id) - (mutex-lock! *db-stats-mutex*) - (let* ((runkey (conc "run-id=" run-id " ")) - (cmds (filter (lambda (x) - (substring-index runkey x)) - (hash-table-keys *db-stats*))) - (res (if (null? cmds) - (cons 'none 0) - (let loop ((cmd (car cmds)) - (tal (cdr cmds)) - (max-cmd (car cmds)) - (res 0)) - (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) - (tot (vector-ref cmd-dat 0)) - (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction - (currmax (max res curravg)) - (newmax-cmd (if (> curravg res) cmd max-cmd))) - (if (null? tal) - (if (> tot 10) - (cons newmax-cmd currmax) - (cons 'none 0)) - (loop (car tal)(cdr tal) newmax-cmd currmax))))))) - (mutex-unlock! *db-stats-mutex*) - res)) - -(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((qry-is-write (not (member cmd api:read-only-queries))) - (db-file-path (db:dbfile-path)) ;; 0)) - (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) - (read-only (not (file-write-access? db-file-path))) - (start (current-milliseconds)) - (resdat (if (not (and read-only qry-is-write)) - (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) - ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - ;; exn ;; This is an attempt to detect that situation and recover gracefully - ;; (begin - ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy - (if (and (vector? v) - (> (vector-length v) 1)) - (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) - newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '()))) ;; ) ;; we could also check that the returned types are valid - (vector #t '()))) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1)) - (duration (- (current-milliseconds) start))) - (if (and read-only qry-is-write) - (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) - (if (not success) - (if (> remretries 0) - (begin - (debug:print-error 0 *default-log-port* "local query failed. Trying again.") - (thread-sleep! (/ (random 5000) 1000)) ;; some random delay - (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) - (begin - (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") - #f)) - (begin - ;; (rmt:update-db-stats run-id cmd params duration) - ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it - (if qry-is-write - (let ((start-time (current-seconds))) - (mutex-lock! *db-multi-sync-mutex*) -/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) - (mutex-unlock! *db-multi-sync-mutex*))))) - res)) - -(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params) - (let* ((run-id (if run-id run-id 0)) - (res (http-transport:client-api-send-receive run-id runremote cmd params))) - (if (and res (vector-ref res 0)) - (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! - #f))) - -;;====================================================================== -;; -;; A C T U A L A P I C A L L S -;; -;;====================================================================== - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -(define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server run-id (list run-id))) - -(define (rmt:start-server run-id) - (rmt:send-receive 'start-server 0 (list run-id))) - -;;====================================================================== -;; M I S C -;;====================================================================== - -(define (rmt:login run-id) - (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) - -;; This login does no retries under the hood - it acts a bit like a ping. -;; Deprecated for nmsg-transport. -;; -(define (rmt:login-no-auto-client-setup runremote) - (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature)))) - -;; hand off a call to one of the db:queries statements -;; added run-id to make looking up the correct db possible -;; -(define (rmt:general-call stmtname run-id . params) - (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) - - -;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host -(define (rmt:get-latest-host-load hostname) - (rmt:send-receive 'get-latest-host-load 0 (list hostname))) - -(define (rmt:sdb-qry qry val run-id) - ;; add caching if qry is 'getid or 'getstr - (rmt:send-receive 'sdb-qry run-id (list qry val))) - -;; NOT COMPLETED -(define (rmt:runtests user run-id testpatt params) - (rmt:send-receive 'runtests run-id testpatt)) - -(define (rmt:get-run-record-ids target run keynames test-patt) - (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt))) - -(define (rmt:get-changed-record-ids since-time) - (rmt:send-receive 'get-changed-record-ids #f (list since-time)) ) - -(define (rmt:drop-all-triggers) - (rmt:send-receive 'drop-all-triggers #f '())) - -(define (rmt:create-all-triggers) - (rmt:send-receive 'create-all-triggers #f '())) - -;;====================================================================== -;; T E S T M E T A -;;====================================================================== - -(define (rmt:get-tests-tags) - (rmt:send-receive 'get-tests-tags #f '())) - -;;====================================================================== -;; K E Y S -;;====================================================================== - -;; These require run-id because the values come from the run! -;; -(define (rmt:get-key-val-pairs run-id) - (rmt:send-receive 'get-key-val-pairs run-id (list run-id))) - -(define (rmt:get-keys) - (if *db-keys* *db-keys* - (let ((res (rmt:send-receive 'get-keys #f '()))) - (set! *db-keys* res) - res))) - -(define (rmt:get-keys-write) ;; dummy query to force server start - (let ((res (rmt:send-receive 'get-keys-write #f '()))) - (set! *db-keys* res) - res)) - -;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe -;; to cache the resuls in a hash -;; -(define (rmt:get-key-vals run-id) - (or (hash-table-ref/default *keyvals* run-id #f) - (let ((res (rmt:send-receive 'get-key-vals #f (list run-id)))) - (hash-table-set! *keyvals* run-id res) - res))) - -(define (rmt:get-targets) - (rmt:send-receive 'get-targets #f '())) - -(define (rmt:get-target run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-target run-id (list run-id))) - -(define (rmt:get-run-times runpatt targetpatt) - (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) - - -;;====================================================================== -;; T E S T S -;;====================================================================== - -;; Just some syntatic sugar -(define (rmt:register-test run-id test-name item-path) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:general-call 'register-test run-id run-id test-name item-path)) - -(define (rmt:get-test-id run-id testname item-path) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) - -;; run-id is NOT used -;; -(define (rmt:get-test-info-by-id run-id test-id) - (if (number? test-id) - (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) - (begin - (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) - (print-call-chain (current-error-port)) - #f))) - -(define (rmt:test-get-rundir-from-test-id run-id test-id) - (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) - -(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) - (assert (number? run-id) "FATAL: Run id required.") - (let* ((test-path (if (string? work-area) - work-area - (rmt:test-get-rundir-from-test-id run-id test-id)))) - (debug:print 3 *default-log-port* "TEST PATH: " test-path) - (open-test-db test-path))) - -;; WARNING: This currently bypasses the transaction wrapped writes system -(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) - -(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) - -(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - (assert (number? run-id) "FATAL: Run id required.") - ;; (if (number? run-id) - (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) - ;; (begin - ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) - ;; (print-call-chain (current-error-port)) - ;; '()))) - -(define (rmt:get-tests-for-run-state-status run-id testpatt last-update) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update))) - -;; get stuff via synchash -(define (rmt:synchash-get run-id proc synckey keynum params) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) - -(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) - -;; IDEA: Threadify these - they spend a lot of time waiting ... -;; -(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) - (let ((multi-run-mutex (make-mutex)) - (run-id-list (if run-ids - run-ids - (rmt:get-all-run-ids))) - (result '())) - (if (null? run-id-list) - '() - (let loop ((hed (car run-id-list)) - (tal (cdr run-id-list)) - (threads '())) - (if (> (length threads) 5) - (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) - (let* ((newthread (make-thread - (lambda () - (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) - (if (list? res) - (begin - (mutex-lock! multi-run-mutex) - (set! result (append result res)) - (mutex-unlock! multi-run-mutex)) - (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) - (conc "multi-run-thread for run-id " hed))) - (newthreads (cons newthread threads))) - (thread-start! newthread) - (thread-sleep! 0.05) ;; give that thread some time to start - (if (null? tal) - newthreads - (loop (car tal)(cdr tal) newthreads)))))) - result)) - + +;; (include "common_records.scm") +;; ;; (declare (uses rmtmod)) +;; +;; (import dbfile) ;; rmtmod) +;; +;; ;; +;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! +;; ;; +;; +;; ;; generate entries for ~/.megatestrc with the following +;; ;; +;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u +;; +;; ;;====================================================================== +;; ;; S U P P O R T F U N C T I O N S +;; ;;====================================================================== +;; +;; ;; if a server is either running or in the process of starting call client:setup +;; ;; else return #f to let the calling proc know that there is no server available +;; ;; +;; (define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down. +;; (let* ((cinfo (if (remote? runremote) +;; (remote-conndat runremote) +;; #f))) +;; (if cinfo +;; cinfo +;; (if (server:check-if-running areapath) +;; (client:setup areapath runremote) +;; #f)))) +;; +;; (define (rmt:on-homehost? runremote) +;; (let* ((hh-dat (remote-hh-dat runremote))) +;; (if (pair? hh-dat) +;; (cdr hh-dat) +;; (begin +;; (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) +;; #f)))) +;; +;; +;; ;;====================================================================== +;; +;; (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id +;; +;; ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) +;; ;; +;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; +;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) +;; payload: `((rid . ,rid) +;; (params . ,params))) +;; +;; (if (> attemptnum 2) +;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) +;; +;; (cond +;; ((> attemptnum 2) (thread-sleep! 0.05)) +;; ((> attemptnum 10) (thread-sleep! 0.5)) +;; ((> attemptnum 20) (thread-sleep! 1))) +;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) +;; (begin (server:run *toppath*) (thread-sleep! 3))) +;; +;; +;; ;;DOT digraph megatest_state_status { +;; ;;DOT ranksep=0; +;; ;;DOT // rankdir=LR; +;; ;;DOT node [shape="box"]; +;; ;;DOT "rmt:send-receive" -> MUTEXLOCK; +;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } +;; ;; do all the prep locked under the rmt-mutex +;; (mutex-lock! *rmt-mutex*) +;; +;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote +;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. +;; ;; 3. do the query, if on homehost use local access +;; ;; +;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value +;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas +;; (runremote (or area-dat +;; *runremote*)) +;; (attemptnum (+ 1 attemptnum)) +;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) +;; +;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity +;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; +;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; +;; ;; ensure we have a record for our connection for given area +;; (if (not runremote) ;; can remove this one. should never get here. +;; (begin +;; (set! *runremote* (make-remote)) +;; (let* ((server-info (remote-server-info *runremote*))) +;; (if server-info +;; (begin +;; (remote-server-url-set! *runremote* (server:record->url server-info)) +;; (remote-server-id-set! *runremote* (server:record->id server-info))))) +;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration +;; +;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity +;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; +;; ;; DOT SET_HOMEHOST -> MUTEXLOCK; +;; ;; ensure we have a homehost record +;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost +;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little +;; (let ((hh-data (server:choose-server areapath 'homehost))) +;; (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) +;; +;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) +;; (cond +;; #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds +;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") +;; (set! *runremote* #f) +;; ;; BUG: close-connections should go here? +;; (mutex-unlock! *rmt-mutex*) +;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) +;; +;; ;;DOT EXIT; +;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } +;; ;; give up if more than 150 attempts +;; ((> attemptnum 150) +;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") +;; (exit 1)) +;; +;; ;;DOT CASE2 [label="local\nreadonly\nquery"]; +;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} +;; ;;DOT CASE2 -> "rmt:open-qry-close-locally"; +;; ;; readonly mode, read request- handle it - case 2 +;; ((and readonly-mode +;; (member cmd api:read-only-queries)) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") +;; (rmt:open-qry-close-locally cmd 0 params) +;; ) +;; +;; ;;DOT CASE3 [label="write in\nread-only mode"]; +;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} +;; ;;DOT CASE3 -> "#f"; +;; ;; readonly mode, write request. Do nothing, return #f +;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) +;; +;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. +;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. +;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) +;; ;; +;; ;;DOT CASE4 [label="reset\nconnection"]; +;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} +;; ;;DOT CASE4 -> "rmt:send-receive"; +;; ;; reset the connection if it has been unused too long +;; ((and runremote +;; ;; (remote-conndat runremote) +;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on +;; (+ (remote-last-access runremote) +;; (remote-server-timeout runremote)))) +;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") +;; (http-transport:close-connections runremote) +;; ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections +;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. +;; (mutex-unlock! *rmt-mutex*) +;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; +;; ;;DOT CASE5 [label="local\nread"]; +;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; +;; ;;DOT CASE5 -> "rmt:open-qry-close-locally"; +;; +;; ;; on homehost and this is a read +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (rmt:on-homehost? runremote) +;; (member cmd api:read-only-queries)) ;; this is a read +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") +;; (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ;;DOT CASE6 [label="init\nremote"]; +;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; +;; ;;DOT CASE6 -> "rmt:send-receive"; +;; ;; on homehost and this is a write, we already have a server, but server has died +;; +;; ;; reinstate this keep-alive section but inject a time condition into the (add ... +;; ;; +;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost +;; ;; (not (member cmd api:read-only-queries)) ;; this is a write +;; ;; (remote-server-url runremote) ;; have a server +;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. +;; ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") +;; ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up +;; ;; (set! *runremote* (make-remote)) +;; ;; (let* ((server-info (remote-server-info *runremote*))) +;; ;; (if server-info +;; ;; (begin +;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) +;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) +;; ;; (remote-force-server-set! runremote (common:force-server?)) +;; ;; (mutex-unlock! *rmt-mutex*) +;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") +;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; +;; ;;DOT CASE7 [label="homehost\nwrite"]; +;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; +;; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; +;; ;; on homehost and this is a write, we already have a server +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (cdr (remote-hh-dat runremote)) ;; on homehost +;; (not (member cmd api:read-only-queries)) ;; this is a write +;; (remote-server-url runremote)) ;; have a server +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") +;; (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ;;DOT CASE8 [label="force\nserver"]; +;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; +;; ;;DOT CASE8 -> "rmt:open-qry-close-locally"; +;; ;; on homehost, no server contact made and this is a write, passively start a server +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (cdr (remote-hh-dat runremote)) ;; have homehost +;; (not (remote-server-url runremote)) ;; no connection yet +;; (not (member cmd api:read-only-queries))) ;; not a read-only query +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") +;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call +;; (if server-info +;; (begin +;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed +;; (remote-server-id-set! runremote (server:record->id server-info))) +;; (if (common:force-server?) +;; (server:start-and-wait *toppath*) +;; (server:kind-run *toppath*))) +;; (remote-force-server-set! runremote (common:force-server?)) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") +;; (rmt:open-qry-close-locally cmd 0 params))) +;; +;; ;;DOT CASE9 [label="force server\nnot on homehost"]; +;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; +;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; +;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one +;; (not (remote-conndat runremote))) +;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost +;; (not (remote-conndat runremote)))) ;; and no connection +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) +;; (mutex-unlock! *rmt-mutex*) +;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? +;; (server:start-and-wait *toppath*)) +;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http +;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as +;; +;; ;;DOT CASE10 [label="on homehost"]; +;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; +;; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; +;; ;; all set up if get this far, dispatch the query +;; ((and (not (remote-force-server runremote)) +;; (cdr (remote-hh-dat runremote))) ;; we are on homehost +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") +;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) +;; +;; ;;DOT CASE11 [label="send_receive"]; +;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; +;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; +;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; +;; ;; not on homehost, do server query +;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) +;; ;;DOT } +;; +;; ;; bunch of small functions factored out of send-receive to make debug easier +;; ;; +;; +;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) +;; ;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") +;; ;; (mutex-lock! *rmt-mutex*) +;; (let* ((conninfo (remote-conndat runremote)) +;; (dat-in (condition-case ;; handling here has +;; ;; caused a lot of +;; ;; problems. However it +;; ;; is needed to deal with +;; ;; attemtped +;; ;; communication to +;; ;; servers that have gone +;; ;; away +;; (http-transport:client-api-send-receive 0 runremote cmd params) +;; ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote) +;; ((servermismatch) (vector #f "Server id mismatch" )) +;; ((commfail)(vector #f "communications fail")) +;; ((exn)(vector #f "other fail" (print-call-chain))))) +;; (dat (if (and (vector? dat-in) ;; ... check it is a correct size +;; (> (vector-length dat-in) 1)) +;; dat-in +;; (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) +;; (success (if (vector? dat) (vector-ref dat 0) #f)) +;; (res (if (vector? dat) (vector-ref dat 1) #f))) +;; (if (and (vector? conninfo) (< 5 (vector-length conninfo))) +;; (remote-last-access-set! runremote (current-seconds)) ;; refresh access time +;; (begin +;; (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) +;; (set! conninfo #f) +;; (http-transport:close-connections runremote))) +;; (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) +;; (mutex-unlock! *rmt-mutex*) +;; (if success ;; success only tells us that the transport was +;; ;; successful, have to examine the data to see if +;; ;; there was a detected issue at the other end +;; (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) +;; (begin +;; (debug:print-error 0 *default-log-port* " dat=" dat) +;; (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) +;; ))) +;; +;; (define (rmt:print-db-stats) +;; (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" +;; (debug:print 18 *default-log-port* "DB Stats\n========") +;; (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) +;; (for-each (lambda (cmd) +;; (let ((cmd-dat (hash-table-ref *db-stats* cmd))) +;; (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) +;; (sort (hash-table-keys *db-stats*) +;; (lambda (a b) +;; (> (vector-ref (hash-table-ref *db-stats* a) 0) +;; (vector-ref (hash-table-ref *db-stats* b) 0))))))) +;; +;; (define (rmt:get-max-query-average run-id) +;; (mutex-lock! *db-stats-mutex*) +;; (let* ((runkey (conc "run-id=" run-id " ")) +;; (cmds (filter (lambda (x) +;; (substring-index runkey x)) +;; (hash-table-keys *db-stats*))) +;; (res (if (null? cmds) +;; (cons 'none 0) +;; (let loop ((cmd (car cmds)) +;; (tal (cdr cmds)) +;; (max-cmd (car cmds)) +;; (res 0)) +;; (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) +;; (tot (vector-ref cmd-dat 0)) +;; (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction +;; (currmax (max res curravg)) +;; (newmax-cmd (if (> curravg res) cmd max-cmd))) +;; (if (null? tal) +;; (if (> tot 10) +;; (cons newmax-cmd currmax) +;; (cons 'none 0)) +;; (loop (car tal)(cdr tal) newmax-cmd currmax))))))) +;; (mutex-unlock! *db-stats-mutex*) +;; res)) +;; +;; (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) +;; (let* ((qry-is-write (not (member cmd api:read-only-queries))) +;; (db-file-path (db:dbfile-path)) ;; 0)) +;; (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) +;; (read-only (not (file-write-access? db-file-path))) +;; (start (current-milliseconds)) +;; (resdat (if (not (and read-only qry-is-write)) +;; (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) +;; ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. +;; ;; exn ;; This is an attempt to detect that situation and recover gracefully +;; ;; (begin +;; ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) +;; ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy +;; (if (and (vector? v) +;; (> (vector-length v) 1)) +;; (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) +;; newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record +;; (vector #t '()))) ;; ) ;; we could also check that the returned types are valid +;; (vector #t '()))) +;; (success (vector-ref resdat 0)) +;; (res (vector-ref resdat 1)) +;; (duration (- (current-milliseconds) start))) +;; (if (and read-only qry-is-write) +;; (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) +;; (if (not success) +;; (if (> remretries 0) +;; (begin +;; (debug:print-error 0 *default-log-port* "local query failed. Trying again.") +;; (thread-sleep! (/ (random 5000) 1000)) ;; some random delay +;; (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) +;; (begin +;; (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") +;; #f)) +;; (begin +;; ;; (rmt:update-db-stats run-id cmd params duration) +;; ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it +;; (if qry-is-write +;; (let ((start-time (current-seconds))) +;; (mutex-lock! *db-multi-sync-mutex*) +;; / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) +;; (mutex-unlock! *db-multi-sync-mutex*))))) +;; res)) +;; +;; (define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params) +;; (let* ((run-id (if run-id run-id 0)) +;; (res (http-transport:client-api-send-receive run-id runremote cmd params))) +;; (if (and res (vector-ref res 0)) +;; (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! +;; #f))) +;; +;; ;;====================================================================== +;; ;; +;; ;; A C T U A L A P I C A L L S +;; ;; +;; ;;====================================================================== +;; +;; ;;====================================================================== +;; ;; S E R V E R +;; ;;====================================================================== +;; +;; (define (rmt:kill-server run-id) +;; (rmt:send-receive 'kill-server run-id (list run-id))) +;; +;; (define (rmt:start-server run-id) +;; (rmt:send-receive 'start-server 0 (list run-id))) +;; +;; ;;====================================================================== +;; ;; M I S C +;; ;;====================================================================== +;; +;; (define (rmt:login run-id) +;; (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) +;; +;; ;; This login does no retries under the hood - it acts a bit like a ping. +;; ;; Deprecated for nmsg-transport. +;; ;; +;; (define (rmt:login-no-auto-client-setup runremote) +;; (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature)))) +;; +;; ;; hand off a call to one of the db:queries statements +;; ;; added run-id to make looking up the correct db possible +;; ;; +;; (define (rmt:general-call stmtname run-id . params) +;; (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) +;; +;; +;; ;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host +;; (define (rmt:get-latest-host-load hostname) +;; (rmt:send-receive 'get-latest-host-load 0 (list hostname))) +;; +;; (define (rmt:sdb-qry qry val run-id) +;; ;; add caching if qry is 'getid or 'getstr +;; (rmt:send-receive 'sdb-qry run-id (list qry val))) +;; +;; ;; NOT COMPLETED +;; (define (rmt:runtests user run-id testpatt params) +;; (rmt:send-receive 'runtests run-id testpatt)) +;; +;; (define (rmt:get-run-record-ids target run keynames test-patt) +;; (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt))) +;; +;; (define (rmt:get-changed-record-ids since-time) +;; (rmt:send-receive 'get-changed-record-ids #f (list since-time)) ) +;; +;; (define (rmt:drop-all-triggers) +;; (rmt:send-receive 'drop-all-triggers #f '())) +;; +;; (define (rmt:create-all-triggers) +;; (rmt:send-receive 'create-all-triggers #f '())) +;; +;; ;;====================================================================== +;; ;; T E S T M E T A +;; ;;====================================================================== +;; +;; (define (rmt:get-tests-tags) +;; (rmt:send-receive 'get-tests-tags #f '())) +;; +;; ;;====================================================================== +;; ;; K E Y S +;; ;;====================================================================== +;; +;; ;; These require run-id because the values come from the run! +;; ;; +;; (define (rmt:get-key-val-pairs run-id) +;; (rmt:send-receive 'get-key-val-pairs run-id (list run-id))) +;; +;; (define (rmt:get-keys) +;; (if *db-keys* *db-keys* +;; (let ((res (rmt:send-receive 'get-keys #f '()))) +;; (set! *db-keys* res) +;; res))) +;; +;; (define (rmt:get-keys-write) ;; dummy query to force server start +;; (let ((res (rmt:send-receive 'get-keys-write #f '()))) +;; (set! *db-keys* res) +;; res)) +;; +;; ;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe +;; ;; to cache the resuls in a hash +;; ;; +;; (define (rmt:get-key-vals run-id) +;; (or (hash-table-ref/default *keyvals* run-id #f) +;; (let ((res (rmt:send-receive 'get-key-vals #f (list run-id)))) +;; (hash-table-set! *keyvals* run-id res) +;; res))) +;; +;; (define (rmt:get-targets) +;; (rmt:send-receive 'get-targets #f '())) +;; +;; (define (rmt:get-target run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-target run-id (list run-id))) +;; +;; (define (rmt:get-run-times runpatt targetpatt) +;; (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) +;; +;; +;; ;;====================================================================== +;; ;; T E S T S +;; ;;====================================================================== +;; +;; ;; Just some syntatic sugar +;; (define (rmt:register-test run-id test-name item-path) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:general-call 'register-test run-id run-id test-name item-path)) +;; +;; (define (rmt:get-test-id run-id testname item-path) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) +;; +;; ;; run-id is NOT used +;; ;; +;; (define (rmt:get-test-info-by-id run-id test-id) +;; (if (number? test-id) +;; (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) +;; (print-call-chain (current-error-port)) +;; #f))) +;; +;; (define (rmt:test-get-rundir-from-test-id run-id test-id) +;; (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) +;; +;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (let* ((test-path (if (string? work-area) +;; work-area +;; (rmt:test-get-rundir-from-test-id run-id test-id)))) +;; (debug:print 3 *default-log-port* "TEST PATH: " test-path) +;; (open-test-db test-path))) +;; +;; ;; WARNING: This currently bypasses the transaction wrapped writes system +;; (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) +;; +;; (define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) +;; +;; (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) +;; (assert (number? run-id) "FATAL: Run id required.") +;; ;; (if (number? run-id) +;; (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) +;; ;; (begin +;; ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) +;; ;; (print-call-chain (current-error-port)) +;; ;; '()))) +;; +;; (define (rmt:get-tests-for-run-state-status run-id testpatt last-update) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update))) +;; +;; ;; get stuff via synchash +;; (define (rmt:synchash-get run-id proc synckey keynum params) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) +;; +;; (define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) +;; ;; ;; IDEA: Threadify these - they spend a lot of time waiting ... ;; ;; ;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) -;; (let ((run-id-list (if run-ids +;; (let ((multi-run-mutex (make-mutex)) +;; (run-id-list (if run-ids ;; run-ids -;; (rmt:get-all-run-ids)))) -;; (apply append (map (lambda (run-id) -;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) -;; run-id-list)))) - -(define (rmt:delete-test-records run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) - -(define (rmt:test-set-state-status run-id test-id state status msg) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) - -(define (rmt:test-toplevel-num-items run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) - -;; (define (rmt:get-previous-test-run-record run-id test-name item-path) -;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) - -(define (rmt:get-matching-previous-test-run-records run-id test-name item-path) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) - -(define (rmt:test-get-logfile-info run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name))) - -(define (rmt:test-get-records-for-index-file run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name))) - -(define (rmt:get-testinfo-state-status run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) - -(define (rmt:test-set-log! run-id test-id logf) - (assert (number? run-id) "FATAL: Run id required.") - (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) - -(define (rmt:test-set-top-process-pid run-id test-id pid) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) - -(define (rmt:test-get-top-process-pid run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) - -(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) - (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) - -;; NOTE: This will open and access ALL run databases. -;; -(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) - (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) - (apply append - (map (lambda (run-id) - (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) - run-ids)))) - -(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) - -(define (rmt:get-count-tests-running-for-run-id run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) - -(define (rmt:get-not-completed-cnt run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) - - -;; Statistical queries - -(define (rmt:get-count-tests-running run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running run-id (list run-id))) - -(define (rmt:get-count-tests-running-for-testname run-id testname) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) - -(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) - -;; state and status are extra hints not usually used in the calculation -;; -(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) - -(define (rmt:set-state-status-and-roll-up-run run-id state status) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) - - -(define (rmt:update-pass-fail-counts run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) - -(define (rmt:top-test-set-per-pf-counts run-id test-name) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) - -(define (rmt:get-raw-run-stats run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-raw-run-stats run-id (list run-id))) - -(define (rmt:get-test-times runname target) - (rmt:send-receive 'get-test-times #f (list runname target ))) - -;;====================================================================== -;; R U N S -;;====================================================================== - -;; BUG - LOOK AT HOW THIS WORKS!!! -;; -(define (rmt:get-run-info run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-info #f (list run-id))) - -(define (rmt:get-num-runs runpatt) - (rmt:send-receive 'get-num-runs #f (list runpatt))) - -(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) - (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) - -;; Use the special run-id == #f scenario here since there is no run yet -(define (rmt:register-run keyvals runname state status user contour) - (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))) - -(define (rmt:get-run-name-from-id run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-name-from-id #f (list run-id))) - -(define (rmt:delete-run run-id) - (rmt:send-receive 'delete-run #f (list run-id))) - -(define (rmt:update-run-stats run-id stats) - (rmt:send-receive 'update-run-stats #f (list run-id stats))) - -(define (rmt:delete-old-deleted-test-records) - (rmt:send-receive 'delete-old-deleted-test-records #f '())) - -(define (rmt:get-runs runpatt count offset keypatts) - (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) - -(define (rmt:simple-get-runs runpatt count offset target last-update) - (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update))) - -(define (rmt:get-all-run-ids) - (rmt:send-receive 'get-all-run-ids #f '())) - -(define (rmt:get-prev-run-ids run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-prev-run-ids #f (list run-id))) - -(define (rmt:lock/unlock-run run-id lock unlock user) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) - -;; set/get status -(define (rmt:get-run-status run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-status #f (list run-id))) - -(define (rmt:get-run-state run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-run-state #f (list run-id))) - - -(define (rmt:set-run-status run-id run-status #!key (msg #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) - -(define (rmt:set-run-state-status run-id state status ) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-run-state-status #f (list run-id state status))) - -(define (rmt:update-tesdata-on-repilcate-db old-lt new-lt) -(rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt))) - -(define (rmt:update-run-event_time run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'update-run-event_time #f (list run-id))) - -(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default - (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) - -(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) - (assert (number? run-id) "FATAL: Run id required.") - ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) - (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) - -(define (rmt:get-main-run-stats run-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-main-run-stats #f (list run-id))) - -(define (rmt:get-var varname) - (rmt:send-receive 'get-var #f (list varname))) - -(define (rmt:del-var varname) - (rmt:send-receive 'del-var #f (list varname))) - -(define (rmt:set-var varname value) - (rmt:send-receive 'set-var #f (list varname value))) - -(define (rmt:inc-var varname) - (rmt:send-receive 'inc-var #f (list varname))) - -(define (rmt:dec-var varname) - (rmt:send-receive 'dec-var #f (list varname))) - -(define (rmt:add-var varname value) - (rmt:send-receive 'add-var #f (list varname value))) - -;;====================================================================== -;; M U L T I R U N Q U E R I E S -;;====================================================================== - -;; Need to move this to multi-run section and make associated changes -(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (rmt:find-and-mark-incomplete run-id ovr-deadtime)) - run-ids))) - -;; get the previous record for when this test was run where all keys match but runname -;; returns #f if no such test found, returns a single test record if found -;; -;; Run this at the client end since we have to connect to multiple run-id dbs -;; -(define (rmt:get-previous-test-run-record run-id test-name item-path) - (let* ((keyvals (rmt:get-key-val-pairs run-id)) - (keys (rmt:get-keys)) - (selstr (string-intersperse keys ",")) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) - (if (not keyvals) - #f - (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) - ;; for each run starting with the most recent look to see if there is a matching test - ;; if found then return that matching test record - (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) - (if (null? prev-run-ids) #f - (let loop ((hed (car prev-run-ids)) - (tal (cdr prev-run-ids))) - (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses - #f #f #f ;; offset limit not-in hide/not-hide - #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode - (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) - (if (and (null? results) - (not (null? tal))) - (loop (car tal)(cdr tal)) - (if (null? results) #f - (car results)))))))))) - -(define (rmt:get-run-stats) - (rmt:send-receive 'get-run-stats #f '())) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -;; Getting steps is more complicated. -;; -;; If given work area -;; 1. Find the testdat.db file -;; 2. Open the testdat.db file and do the query -;; If not given the work area -;; 1. Do a remote call to get the test path -;; 2. Continue as above -;; -;;(define (rmt:get-steps-for-test run-id test-id) -;; (rmt:send-receive 'get-steps-data run-id (list test-id))) - -(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) - (assert (number? run-id) "FATAL: Run id required.") - (let* ((state (items:check-valid-items "state" state-in)) - (status (items:check-valid-items "status" status-in))) - (if (or (not state)(not status)) - (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state") - " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) - (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile)))) - - -(define (rmt:delete-steps-for-test! run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id))) - -(define (rmt:get-steps-for-test run-id test-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id))) - -(define (rmt:get-steps-info-by-id run-id test-step-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id))) - -;;====================================================================== -;; T E S T D A T A -;;====================================================================== - -(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) - -(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f)) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt))) - -(define (rmt:get-data-info-by-id run-id test-data-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id))) - -(define (rmt:testmeta-add-record testname) - (rmt:send-receive 'testmeta-add-record #f (list testname))) - -(define (rmt:testmeta-get-record testname) - (rmt:send-receive 'testmeta-get-record #f (list testname))) - -(define (rmt:testmeta-update-field test-name fld val) - (rmt:send-receive 'testmeta-update-field #f (list test-name fld val))) - -(define (rmt:test-data-rollup run-id test-id status) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status))) - -(define (rmt:csv->test-data run-id test-id csvdata) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata))) - -;;====================================================================== -;; T A S K S -;;====================================================================== - -(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) - (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt))) - -(define (rmt:tasks-add action owner target runname testpatt params) - (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params))) - -(define (rmt:tasks-set-state-given-param-key param-key new-state) - (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) - -(define (rmt:tasks-get-last target runname) - (rmt:send-receive 'tasks-get-last #f (list target runname))) - -;;====================================================================== -;; N O S Y N C D B -;;====================================================================== - -(define (rmt:no-sync-set var val) - (rmt:send-receive 'no-sync-set #f `(,var ,val))) - -(define (rmt:no-sync-get/default var default) - (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) - -(define (rmt:no-sync-del! var) - (rmt:send-receive 'no-sync-del! #f `(,var))) - -(define (rmt:no-sync-get-lock keyname) - (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) - -;;====================================================================== -;; A R C H I V E S -;;====================================================================== - -(define (rmt:archive-get-allocations testname itempath dneeded) - (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) - -(define (rmt:archive-register-block-name bdisk-id archive-path) - (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path))) - -(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) - (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey))) - -(define (rmt:archive-register-disk bdisk-name bdisk-path df) - (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df))) - -(define (rmt:test-set-archive-block-id run-id test-id archive-block-id) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) - -(define (rmt:test-get-archive-block-info archive-block-id) - (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) - - -(define (rmtmod:calc-ro-mode runremote *toppath*) - (if (and runremote - (remote-ro-mode-checked runremote)) - (remote-ro-mode runremote) - (let* ((mtcfgfile (conc *toppath* "/megatest.config")) - (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (remote-ro-mode-set! runremote ro-mode) - (remote-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))) - -(define (extras-readonly-mode rmt-mutex log-port cmd params) - (mutex-unlock! rmt-mutex) - (debug:print-info 12 log-port "rmt:send-receive, case 3") - (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) - #f) - -(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) - (mutex-lock! *rmt-mutex*) - (http-transport:close-connections runremote) - (remote-server-url-set! runremote #f) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - -(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (if (and (vector? res) - (eq? (vector-length res) 2) - (eq? (vector-ref res 1) 'overloaded)) ;; since we are - ;; looking at the - ;; data to carry the - ;; error we'll use a - ;; fairly obtuse - ;; combo to minimise - ;; the chances of - ;; some sort of - ;; collision. this - ;; is the case where - ;; the returned data - ;; is bad or the - ;; server is - ;; overloaded and we - ;; want to ease off - ;; the queries - (let ((wait-delay (+ attemptnum (* attemptnum 10)))) - (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") - (mutex-lock! *rmt-mutex*) - (http-transport:close-connections runremote) - (set! *runremote* #f) ;; force starting over - (mutex-unlock! *rmt-mutex*) - (thread-sleep! wait-delay) - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - res)) ;; All good, return res - -#;(set-functions rmt:send-receive remote-server-url-set! - http-transport:close-connections remote-conndat-set! - debug:print debug:print-info - remote-ro-mode remote-ro-mode-set! - remote-ro-mode-checked-set! remote-ro-mode-checked) +;; (rmt:get-all-run-ids))) +;; (result '())) +;; (if (null? run-id-list) +;; '() +;; (let loop ((hed (car run-id-list)) +;; (tal (cdr run-id-list)) +;; (threads '())) +;; (if (> (length threads) 5) +;; (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) +;; (let* ((newthread (make-thread +;; (lambda () +;; (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) +;; (if (list? res) +;; (begin +;; (mutex-lock! multi-run-mutex) +;; (set! result (append result res)) +;; (mutex-unlock! multi-run-mutex)) +;; (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) +;; (conc "multi-run-thread for run-id " hed))) +;; (newthreads (cons newthread threads))) +;; (thread-start! newthread) +;; (thread-sleep! 0.05) ;; give that thread some time to start +;; (if (null? tal) +;; newthreads +;; (loop (car tal)(cdr tal) newthreads)))))) +;; result)) +;; +;; ;; ;; IDEA: Threadify these - they spend a lot of time waiting ... +;; ;; ;; +;; ;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) +;; ;; (let ((run-id-list (if run-ids +;; ;; run-ids +;; ;; (rmt:get-all-run-ids)))) +;; ;; (apply append (map (lambda (run-id) +;; ;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) +;; ;; run-id-list)))) +;; +;; (define (rmt:delete-test-records run-id test-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) +;; +;; (define (rmt:test-set-state-status run-id test-id state status msg) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) +;; +;; (define (rmt:test-toplevel-num-items run-id test-name) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) +;; +;; ;; (define (rmt:get-previous-test-run-record run-id test-name item-path) +;; ;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) +;; +;; (define (rmt:get-matching-previous-test-run-records run-id test-name item-path) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) +;; +;; (define (rmt:test-get-logfile-info run-id test-name) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name))) +;; +;; (define (rmt:test-get-records-for-index-file run-id test-name) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name))) +;; +;; (define (rmt:get-testinfo-state-status run-id test-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) +;; +;; (define (rmt:test-set-log! run-id test-id logf) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) +;; +;; (define (rmt:test-set-top-process-pid run-id test-id pid) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) +;; +;; (define (rmt:test-get-top-process-pid run-id test-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) +;; +;; (define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) +;; (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) +;; +;; ;; NOTE: This will open and access ALL run databases. +;; ;; +;; (define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) +;; (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) +;; (apply append +;; (map (lambda (run-id) +;; (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) +;; run-ids)))) +;; +;; (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) +;; +;; (define (rmt:get-count-tests-running-for-run-id run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) +;; +;; (define (rmt:get-not-completed-cnt run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) +;; +;; +;; ;; Statistical queries +;; +;; (define (rmt:get-count-tests-running run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-count-tests-running run-id (list run-id))) +;; +;; (define (rmt:get-count-tests-running-for-testname run-id testname) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) +;; +;; (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) +;; +;; ;; state and status are extra hints not usually used in the calculation +;; ;; +;; (define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) +;; +;; (define (rmt:set-state-status-and-roll-up-run run-id state status) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) +;; +;; +;; (define (rmt:update-pass-fail-counts run-id test-name) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) +;; +;; (define (rmt:top-test-set-per-pf-counts run-id test-name) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) +;; +;; (define (rmt:get-raw-run-stats run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-raw-run-stats run-id (list run-id))) +;; +;; (define (rmt:get-test-times runname target) +;; (rmt:send-receive 'get-test-times #f (list runname target ))) +;; +;; ;;====================================================================== +;; ;; R U N S +;; ;;====================================================================== +;; +;; ;; BUG - LOOK AT HOW THIS WORKS!!! +;; ;; +;; (define (rmt:get-run-info run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-run-info #f (list run-id))) +;; +;; (define (rmt:get-num-runs runpatt) +;; (rmt:send-receive 'get-num-runs #f (list runpatt))) +;; +;; (define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) +;; (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) +;; +;; ;; Use the special run-id == #f scenario here since there is no run yet +;; (define (rmt:register-run keyvals runname state status user contour) +;; (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))) +;; +;; (define (rmt:get-run-name-from-id run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-run-name-from-id #f (list run-id))) +;; +;; (define (rmt:delete-run run-id) +;; (rmt:send-receive 'delete-run #f (list run-id))) +;; +;; (define (rmt:update-run-stats run-id stats) +;; (rmt:send-receive 'update-run-stats #f (list run-id stats))) +;; +;; (define (rmt:delete-old-deleted-test-records) +;; (rmt:send-receive 'delete-old-deleted-test-records #f '())) +;; +;; (define (rmt:get-runs runpatt count offset keypatts) +;; (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) +;; +;; (define (rmt:simple-get-runs runpatt count offset target last-update) +;; (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update))) +;; +;; (define (rmt:get-all-run-ids) +;; (rmt:send-receive 'get-all-run-ids #f '())) +;; +;; (define (rmt:get-prev-run-ids run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-prev-run-ids #f (list run-id))) +;; +;; (define (rmt:lock/unlock-run run-id lock unlock user) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) +;; +;; ;; set/get status +;; (define (rmt:get-run-status run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-run-status #f (list run-id))) +;; +;; (define (rmt:get-run-state run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-run-state #f (list run-id))) +;; +;; +;; (define (rmt:set-run-status run-id run-status #!key (msg #f)) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) +;; +;; (define (rmt:set-run-state-status run-id state status ) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'set-run-state-status #f (list run-id state status))) +;; +;; (define (rmt:update-tesdata-on-repilcate-db old-lt new-lt) +;; (rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt))) +;; +;; (define (rmt:update-run-event_time run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'update-run-event_time #f (list run-id))) +;; +;; (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default +;; (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) +;; +;; (define (rmt:find-and-mark-incomplete run-id ovr-deadtime) +;; (assert (number? run-id) "FATAL: Run id required.") +;; ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) +;; (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) +;; +;; (define (rmt:get-main-run-stats run-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-main-run-stats #f (list run-id))) +;; +;; (define (rmt:get-var varname) +;; (rmt:send-receive 'get-var #f (list varname))) +;; +;; (define (rmt:del-var varname) +;; (rmt:send-receive 'del-var #f (list varname))) +;; +;; (define (rmt:set-var varname value) +;; (rmt:send-receive 'set-var #f (list varname value))) +;; +;; (define (rmt:inc-var varname) +;; (rmt:send-receive 'inc-var #f (list varname))) +;; +;; (define (rmt:dec-var varname) +;; (rmt:send-receive 'dec-var #f (list varname))) +;; +;; (define (rmt:add-var varname value) +;; (rmt:send-receive 'add-var #f (list varname value))) +;; +;; ;;====================================================================== +;; ;; M U L T I R U N Q U E R I E S +;; ;;====================================================================== +;; +;; ;; Need to move this to multi-run section and make associated changes +;; (define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) +;; (let ((run-ids (rmt:get-all-run-ids))) +;; (for-each (lambda (run-id) +;; (rmt:find-and-mark-incomplete run-id ovr-deadtime)) +;; run-ids))) +;; +;; ;; get the previous record for when this test was run where all keys match but runname +;; ;; returns #f if no such test found, returns a single test record if found +;; ;; +;; ;; Run this at the client end since we have to connect to multiple run-id dbs +;; ;; +;; (define (rmt:get-previous-test-run-record run-id test-name item-path) +;; (let* ((keyvals (rmt:get-key-val-pairs run-id)) +;; (keys (rmt:get-keys)) +;; (selstr (string-intersperse keys ",")) +;; (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) +;; (if (not keyvals) +;; #f +;; (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) +;; ;; for each run starting with the most recent look to see if there is a matching test +;; ;; if found then return that matching test record +;; (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) +;; (if (null? prev-run-ids) #f +;; (let loop ((hed (car prev-run-ids)) +;; (tal (cdr prev-run-ids))) +;; (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses +;; #f #f #f ;; offset limit not-in hide/not-hide +;; #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode +;; (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) +;; (if (and (null? results) +;; (not (null? tal))) +;; (loop (car tal)(cdr tal)) +;; (if (null? results) #f +;; (car results)))))))))) +;; +;; (define (rmt:get-run-stats) +;; (rmt:send-receive 'get-run-stats #f '())) +;; +;; ;;====================================================================== +;; ;; S T E P S +;; ;;====================================================================== +;; +;; ;; Getting steps is more complicated. +;; ;; +;; ;; If given work area +;; ;; 1. Find the testdat.db file +;; ;; 2. Open the testdat.db file and do the query +;; ;; If not given the work area +;; ;; 1. Do a remote call to get the test path +;; ;; 2. Continue as above +;; ;; +;; ;;(define (rmt:get-steps-for-test run-id test-id) +;; ;; (rmt:send-receive 'get-steps-data run-id (list test-id))) +;; +;; (define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (let* ((state (items:check-valid-items "state" state-in)) +;; (status (items:check-valid-items "status" status-in))) +;; (if (or (not state)(not status)) +;; (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state") +;; " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) +;; (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile)))) +;; +;; +;; (define (rmt:delete-steps-for-test! run-id test-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id))) +;; +;; (define (rmt:get-steps-for-test run-id test-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id))) +;; +;; (define (rmt:get-steps-info-by-id run-id test-step-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id))) +;; +;; ;;====================================================================== +;; ;; T E S T D A T A +;; ;;====================================================================== +;; +;; (define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) +;; +;; (define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f)) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt))) +;; +;; (define (rmt:get-data-info-by-id run-id test-data-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id))) +;; +;; (define (rmt:testmeta-add-record testname) +;; (rmt:send-receive 'testmeta-add-record #f (list testname))) +;; +;; (define (rmt:testmeta-get-record testname) +;; (rmt:send-receive 'testmeta-get-record #f (list testname))) +;; +;; (define (rmt:testmeta-update-field test-name fld val) +;; (rmt:send-receive 'testmeta-update-field #f (list test-name fld val))) +;; +;; (define (rmt:test-data-rollup run-id test-id status) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status))) +;; +;; (define (rmt:csv->test-data run-id test-id csvdata) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata))) +;; +;; ;;====================================================================== +;; ;; T A S K S +;; ;;====================================================================== +;; +;; (define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) +;; (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt))) +;; +;; (define (rmt:tasks-add action owner target runname testpatt params) +;; (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params))) +;; +;; (define (rmt:tasks-set-state-given-param-key param-key new-state) +;; (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) +;; +;; (define (rmt:tasks-get-last target runname) +;; (rmt:send-receive 'tasks-get-last #f (list target runname))) +;; +;; ;;====================================================================== +;; ;; N O S Y N C D B +;; ;;====================================================================== +;; +;; (define (rmt:no-sync-set var val) +;; (rmt:send-receive 'no-sync-set #f `(,var ,val))) +;; +;; (define (rmt:no-sync-get/default var default) +;; (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) +;; +;; (define (rmt:no-sync-del! var) +;; (rmt:send-receive 'no-sync-del! #f `(,var))) +;; +;; (define (rmt:no-sync-get-lock keyname) +;; (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) +;; +;; ;;====================================================================== +;; ;; A R C H I V E S +;; ;;====================================================================== +;; +;; (define (rmt:archive-get-allocations testname itempath dneeded) +;; (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) +;; +;; (define (rmt:archive-register-block-name bdisk-id archive-path) +;; (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path))) +;; +;; (define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) +;; (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey))) +;; +;; (define (rmt:archive-register-disk bdisk-name bdisk-path df) +;; (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df))) +;; +;; (define (rmt:test-set-archive-block-id run-id test-id archive-block-id) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) +;; +;; (define (rmt:test-get-archive-block-info archive-block-id) +;; (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) +;; +;; +;; (define (rmtmod:calc-ro-mode runremote *toppath*) +;; (if (and runremote +;; (remote-ro-mode-checked runremote)) +;; (remote-ro-mode runremote) +;; (let* ((mtcfgfile (conc *toppath* "/megatest.config")) +;; (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future +;; (if runremote +;; (begin +;; (remote-ro-mode-set! runremote ro-mode) +;; (remote-ro-mode-checked-set! runremote #t) +;; ro-mode) +;; ro-mode)))) +;; +;; (define (extras-readonly-mode rmt-mutex log-port cmd params) +;; (mutex-unlock! rmt-mutex) +;; (debug:print-info 12 log-port "rmt:send-receive, case 3") +;; (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) +;; #f) +;; +;; (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) +;; (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) +;; (mutex-lock! *rmt-mutex*) +;; (http-transport:close-connections runremote) +;; (remote-server-url-set! runremote #f) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") +;; (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) +;; +;; (define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) +;; (if (and (vector? res) +;; (eq? (vector-length res) 2) +;; (eq? (vector-ref res 1) 'overloaded)) ;; since we are +;; ;; looking at the +;; ;; data to carry the +;; ;; error we'll use a +;; ;; fairly obtuse +;; ;; combo to minimise +;; ;; the chances of +;; ;; some sort of +;; ;; collision. this +;; ;; is the case where +;; ;; the returned data +;; ;; is bad or the +;; ;; server is +;; ;; overloaded and we +;; ;; want to ease off +;; ;; the queries +;; (let ((wait-delay (+ attemptnum (* attemptnum 10)))) +;; (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") +;; (mutex-lock! *rmt-mutex*) +;; (http-transport:close-connections runremote) +;; (set! *runremote* #f) ;; force starting over +;; (mutex-unlock! *rmt-mutex*) +;; (thread-sleep! wait-delay) +;; (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) +;; res)) ;; All good, return res +;; +;; #;(set-functions rmt:send-receive remote-server-url-set! +;; http-transport:close-connections remote-conndat-set! +;; debug:print debug:print-info +;; remote-ro-mode remote-ro-mode-set! +;; remote-ro-mode-checked-set! remote-ro-mode-checked) +;; Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -18,68 +18,10 @@ ;;====================================================================== (declare (unit rmtmod)) (declare (uses commonmod)) -(declare (uses apimod)) -;; (declare (uses apimod.import)) -(declare (uses ulex)) - -;; (include "ulex/ulex.scm") (module rmtmod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) -(import (prefix commonmod cmod:)) -(import apimod) -(import (prefix ulex ulex:)) - -(defstruct alldat - (areapath #f) - (ulexdat #f) - ) - -;;====================================================================== -;; return the handle struct for sending queries to a specific database -;; - initializes the connection object if this is the first access -;; - finds the "captain" and asks who to talk to for the given dbfname -;; - establishes the connection to the current dbowner -;; -#;(define (rmt:connect alldat dbfname dbtype) - (let* ((ulexdat (or (alldat-ulexdat alldat) - (rmt:setup-ulex alldat)))) - (ulex:connect ulexdat dbfname dbtype))) - -;; setup the remote calls -#;(define (rmt:setup-ulex alldat) - (let* ((udata (ulex:setup))) ;; establish connection to ulex - (alldat-ulexdat-set! alldat udata) - ;; register all needed procs - (ulex:register-handler udata 'ping cmod:get-full-version) ;; override ping with get-full-version - (ulex:register-handler udata 'login cmod:get-full-version) ;; force setup of the connection - (ulex:register-handler udata 'execute api:execute-requests) - udata)) - -;; set up a connection to the current owner of the dbfile associated with rid -;; then send the query to that dbfile owner and wait for a response. -;; -#;(define (rmt:send-receive alldat cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - (let* (;; (alldat *alldat*) - (areapath (alldat-areapath alldat)) - (dbtype (if (or (not rid)(< rid 1)) ;; this is the criteria for "main.db" - "main" "runs")) - (dbfname (if (equal? dbtype "main") - "main.db" - (conc rid ".db"))) - (dbfile (conc areapath "/.db/" dbfname)) - (ulexconn (rmt:connect alldat dbfname dbtype)) ;; ulexconn is our new *runremote*, it is a dbowner struct < pdat lastrefresh > - (udata (alldat-ulexdat alldat))) - (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid params))) - ;; need to call this on the other side - ;; (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) - - #;(with-input-from-string - (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid (with-output-to-string (lambda ()(serialize params)))) - (lambda ()(deserialize))) ) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -25,11 +25,11 @@ (declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) -(declare (uses server)) +(declare (uses servermod)) (declare (uses mt)) (declare (uses archive)) ;; (declare (uses filedb)) (include "common_records.scm") Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -14,857 +14,858 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; -(require-extension (srfi 18) extras tcp s11n) - -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest - directory-utils posix-extras matchable utils) - -(use spiffy uri-common intarweb http-client spiffy-request-vars) - -(declare (unit server)) - -(declare (uses commonmod)) - -(declare (uses common)) -(declare (uses db)) -(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. -;; (declare (uses synchash)) -(declare (uses http-transport)) -;;(declare (uses rpc-transport)) -(declare (uses launch)) -;; (declare (uses daemon)) - -(import commonmod) - -(include "common_records.scm") -(include "db_records.scm") - -(define (server:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -;; Call this to start the actual server -;; - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;; Get the transport -(define (server:get-transport) - (if *transport-type* - *transport-type* - (let ((ttype (string->symbol - (or (args:get-arg "-transport") - (configf:lookup *configdat* "server" "transport") - "rpc")))) - (set! *transport-type* ttype) - ttype))) - -;; Generate a unique signature for this server -(define (server:mk-signature) - (message-digest-string (md5-primitive) - (with-output-to-string - (lambda () - (write (list (current-directory) - (current-process-id) - (argv))))))) - -(define (server:get-client-signature) - (if *my-client-signature* *my-client-signature* - (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic - (set! *my-client-signature* sig) - *my-client-signature*))) - -(define (server:get-server-id) - (if *server-id* *server-id* - (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic - (set! *server-id* sig) - *server-id*))) - -;; When using zmq this would send the message back (two step process) -;; with spiffy or rpc this simply returns the return data to be returned -;; -(define (server:reply return-addr query-sig success/fail result) - (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) - ;; (send-message pubsock target send-more: #t) - ;; (send-message pubsock - (case (server:get-transport) - ((rpc) (db:obj->string (vector success/fail query-sig result))) - ((http) (db:obj->string (vector success/fail query-sig result))) - ((fs) result) - (else - (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) - result))) - -;; Given an area path, start a server process ### NOTE ### > file 2>&1 -;; if the target-host is set -;; try running on that host -;; incidental: rotate logs in logs/ dir. -;; -(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((testsuite (common:get-testsuite-name)) - (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) - (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") - "")) - (cmdln (conc (common:get-megatest-exe) - " -server - ";; (or target-host "-") - (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") - ;; " -log " logfile - " -m testsuite:" testsuite - " " profile-mode - )) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? - (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) - ;; we want the remote server to start in *toppath* so push there - (push-directory areapath) - (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") - (thread-start! log-rotate) - - ;; host.domain.tld match host? - ;; (if (and target-host - ;; ;; look at target host, is it host.domain.tld or ip address and does it - ;; ;; match current ip or hostname - ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - ;; (not (equal? curr-ip target-host))) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - ;; (setenv "TARGETHOST" target-host))) - ;; - (setenv "TARGETHOST_LOGF" logfile) - (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time - (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) - (system (conc "nbfake " cmdln)) - (unsetenv "TARGETHOST_LOGF") - ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - (thread-join! log-rotate) - (pop-directory))) - -;; given a path to a server log return: host port startseconds server-id -;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let -;; example of what it's looking for in the log file: -;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 - -(define (server:logf-get-start-info logf) - (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id - (dbprep-rx (regexp "^SERVER: dbprep")) - (dbprep-found 0) - (bad-dat (list #f #f #f #f #f))) - (handle-exceptions - exn - (begin - ;; WARNING: this is potentially dangerous to blanket ignore the errors - (if (file-exists? logf) - (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) - bad-dat) ;; no idea what went wrong, call it a bad server - (with-input-from-file - logf - (lambda () - (let loop ((inl (read-line)) - (lnum 0)) - (if (not (eof-object? inl)) - (let ((mlst (string-match server-rx inl)) - (dbprep (string-match dbprep-rx inl))) - (if dbprep (set! dbprep-found 1)) - (if (not mlst) - (if (< lnum 500) ;; give up if more than 500 lines of server log read - (loop (read-line)(+ lnum 1)) - (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) - bad-dat)) - (match mlst - ((_ host port start server-id pid) - (list host - (string->number port) - (string->number start) - server-id - (string->number pid))) - (else - (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) - bad-dat)))) - (begin - (if dbprep-found - (begin - (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) - (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) - bad-dat)))))))) - -;; ;; get a list of servers from the log files, with all relevant data -;; ;; ( mod-time host port start-time pid ) -;; ;; -;; (define (server:get-list areapath #!key (limit #f)) -;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) -;; (day-seconds (* 24 60 60))) -;; ;; if the directory exists continue to get the list -;; ;; otherwise attempt to create the logs dir and then -;; ;; continue -;; (if (if (directory-exists? (conc areapath "/logs")) -;; '() -;; (if (file-write-access? areapath) -;; (begin -;; (condition-case -;; (create-directory (conc areapath "/logs") #t) -;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) -;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) -;; (directory-exists? (conc areapath "/logs"))) -;; '())) -;; -;; ;; Get the list of server logs. -;; (let* ( -;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. -;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) -;; (server-logs (glob (conc areapath "/logs/server-*-*.log"))) -;; (num-serv-logs (length server-logs))) -;; (if (or (null? server-logs) (= num-serv-logs 0)) -;; (let () -;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) -;; '() -;; ) -;; (let loop ((hed (string-chomp (car server-logs))) -;; (tal (cdr server-logs)) -;; (res '())) -;; (let* ((mod-time (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) -;; (current-seconds)) ;; 0 -;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted -;; (down-time (- (current-seconds) mod-time)) -;; (serv-dat (if (or (< num-serv-logs 10) -;; (< down-time 900)) ;; day-seconds)) -;; (server:logf-get-start-info hed) -;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at -;; (serv-rec (cons mod-time serv-dat)) -;; (fmatch (string-match fname-rx hed)) -;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) -;; (new-res (if (null? serv-dat) -;; res -;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let -;; (if (null? tal) -;; (if (and limit -;; (> (length new-res) limit)) -;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work -;; new-res) -;; (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) - -#;(define (server:get-num-alive srvlst) - (let ((num-alive 0)) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) - (match-let (((mod-time host port start-time server-id pid) - server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time - (- mod-time start-time) - 0))) - (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) - srvlst) - num-alive)) - -;; ;; given a list of servers get a list of valid servers, i.e. at least -;; ;; 10 seconds old, has started and is less than 1 hour old and is -;; ;; active (i.e. mod-time < 10 seconds -;; ;; -;; ;; mod-time host port start-time pid -;; ;; -;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off -;; ;; and servers should stick around for about two hours or so. -;; ;; -;; (define (server:get-best srvlst) -;; (let* ((nums (server:get-num-servers)) -;; (now (current-seconds)) -;; (slst (sort -;; (filter (lambda (rec) -;; (if (and (list? rec) -;; (> (length rec) 2)) -;; (let ((start-time (list-ref rec 3)) -;; (mod-time (list-ref rec 0))) -;; ;; (print "start-time: " start-time " mod-time: " mod-time) -;; (and start-time mod-time -;; (> (- now start-time) 0) ;; been running at least 0 seconds -;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds -;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set -;; (< (- now start-time) -;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) -;; 180) -;; (random 360)))) ;; under one hour running time +/- 180 -;; )) -;; #f)) -;; srvlst) -;; (lambda (a b) -;; (< (list-ref a 3) -;; (list-ref b 3)))))) -;; (if (> (length slst) nums) -;; (take slst nums) -;; slst))) - -;; ;; switch from server:get-list to server:get-servers-info -;; ;; -;; (define (server:get-first-best areapath) -;; (let ((srvrs (server:get-best (server:get-list areapath)))) -;; (if (and srvrs -;; (not (null? srvrs))) -;; (car srvrs) -;; #f))) -;; -;; (define (server:get-rand-best areapath) -;; (let ((srvrs (server:get-best (server:get-list areapath)))) -;; (if (and (list? srvrs) -;; (not (null? srvrs))) -;; (let* ((len (length srvrs)) -;; (idx (random len))) -;; (list-ref srvrs idx)) -;; #f))) - -(define (server:record->id servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) - #f) - (match-let (((host port start-time server-id pid) - servr)) - (if server-id - server-id - #f)))) - -(define (server:record->url servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) - #f) - (match-let (((host port start-time server-id pid) - servr)) - (if (and host port) - (conc host ":" port) - #f)))) - - -;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough. -;; if it is old enough, overwrite it and wait 0.25 seconds. -;; if it then has the wrong server key, wait + 1 and call this function recursively. -;; -#;(define (server:wait-for-server-start-last-flag areapath) - (let* ((start-flag (conc areapath "/logs/server-start-last")) - ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) - (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) - (server-key (conc (get-host-name) "-" (current-process-id)))) - (if (file-exists? start-flag) - (let* ((fmodtime (file-modification-time start-flag)) - (delta (- (current-seconds) fmodtime)) - (old-enough (> delta idletime)) - (new-server-key "")) - ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t. - ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process. - (if (and old-enough - (begin - (debug:print-info 2 *default-log-port* "Writing " start-flag) - (with-output-to-file start-flag (lambda () (print server-key))) - (thread-sleep! 0.25) - (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) - (equal? server-key new-server-key))) - #t - ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. - (begin - (debug:print-info 0 *default-log-port* "Gating server start, last start: " - (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) - - (thread-sleep! ( + 1 idletime)) - (server:wait-for-server-start-last-flag areapath))))))) - -;; oldest server alive determines host then choose random of youngest -;; five servers on that host -;; -(define (server:get-servers-info areapath) - ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") - (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) - (if (not (file-exists? servinfodir)) - (create-directory servinfodir)) - (let* ((allfiles (glob (conc servinfodir"/*"))) - (res (make-hash-table))) - (for-each - (lambda (f) - (let* ((hostport (pathname-strip-directory f)) - (serverdat (server:logf-get-start-info f))) - (match serverdat - ((host port start server-id pid) - (if (and host port start server-id pid) - (hash-table-set! res hostport serverdat) - (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) - (else - (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) - allfiles) - res))) - -;; check the .servinfo directory, are there other servers running on this -;; or another host? -;; -;; returns #t => ok to start another server -;; #f => not ok to start another server -;; -(define (server:minimal-check areapath) - (server:clean-up-old areapath) - (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) - (servrs (glob (conc srvdir"/*"))) - (thishostip (server:get-best-guess-address (get-host-name))) - (thisservrs (glob (conc srvdir"/"thishostip":*"))) - (homehostinf (server:choose-server areapath 'homehost)) - (havehome (car homehostinf)) - (wearehome (cdr homehostinf))) - (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome - ", numservers: "(length thisservrs)) - (cond - ((not havehome) #t) ;; no homehost yet, go for it - ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another - ((and havehome (not wearehome)) #f) ;; we are not the home host - ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running - (else - (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) - #t)))) - - -(define server-last-start 0) - - -;; oldest server alive determines host then choose random of youngest -;; five servers on that host -;; -;; mode: -;; best - get best server (random of newest five) -;; home - get home host based on oldest server -;; info - print info -(define (server:choose-server areapath #!optional (mode 'best)) - ;; age is current-starttime - ;; find oldest alive - ;; 1. sort by age ascending and ping until good - ;; find alive rand from youngest - ;; 1. sort by age descending - ;; 2. take five - ;; 3. check alive, discard if not and repeat - ;; first we clean up old server files - (server:clean-up-old areapath) - (let* ((since-last (- (current-seconds) server-last-start)) - (server-start-delay 10)) - (if ( < (- (current-seconds) server-last-start) 10 ) - (begin - (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) - (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") - (thread-sleep! server-start-delay) - ) - (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) - ) - ) - (let* ((serversdat (server:get-servers-info areapath)) - (servkeys (hash-table-keys serversdat)) - (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last - (sort servkeys ;; list of "host:port" - (lambda (a b) - (>= (list-ref (hash-table-ref serversdat a) 2) - (list-ref (hash-table-ref serversdat b) 2)))) - '()))) - (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) - (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) - (if (not (null? by-time-asc)) - (let* ((oldest (last by-time-asc)) - (oldest-dat (hash-table-ref serversdat oldest)) - (host (list-ref oldest-dat 0)) - (all-valid (filter (lambda (x) - (equal? host (list-ref (hash-table-ref serversdat x) 0))) - by-time-asc)) - (best-ten (lambda () - (if (> (length all-valid) 11) - (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out - (if (> (length all-valid) 8) - (drop-right all-valid 1) - all-valid)))) - (names->dats (lambda (names) - (map (lambda (x) - (hash-table-ref serversdat x)) - names))) - (am-home? (lambda () - (let* ((currhost (get-host-name)) - (bestadrs (server:get-best-guess-address currhost))) - (or (equal? host currhost) - (equal? host bestadrs)))))) - (case mode - ((info) - (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) - (print "youngest: "(hash-table-ref serversdat (car all-valid)))) - ((home) host) - ((homehost) (cons host (am-home?))) ;; shut up old code - ((home?) (am-home?)) - ((best-ten)(names->dats (best-ten))) - ((all-valid)(names->dats all-valid)) - ((best) (let* ((best-ten (best-ten)) - (len (length best-ten))) - (hash-table-ref serversdat (list-ref best-ten (random len))))) - ((count)(length all-valid)) - (else - (debug:print 0 *default-log-port* "ERROR: invalid command "mode) - #f))) - (begin - (server:run areapath) - (set! server-last-start (current-seconds)) - ;; (thread-sleep! 3) - (case mode - ((homehost) (cons #f #f)) - (else #f)))))) - -(define (server:get-servinfo-dir areapath) - (let* ((spath (conc areapath"/.servinfo"))) - (if (not (file-exists? spath)) - (create-directory spath #t)) - spath)) - -(define (server:clean-up-old areapath) - ;; any server file that has not been touched in ten minutes is effectively dead - (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) - (for-each - (lambda (sfile) - (let* ((modtime (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) - (current-seconds)) - (file-modification-time sfile)))) - (if (and (number? modtime) - (> (- (current-seconds) modtime) - 600)) - (begin - (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") - (handle-exceptions - exn - (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) - (delete-file sfile)))))) - sfiles))) - -;; would like to eventually get rid of this -;; -(define (common:on-homehost?) - (server:choose-server *toppath* 'home?)) - -;; kind start up of server, wait before allowing another server for a given -;; area to be launched -;; -(define (server:kind-run areapath) - ;; look for $MT_RUN_AREA_HOME/logs/server-start-last - ;; and wait for it to be at least seconds old - ;; (server:wait-for-server-start-last-flag areapath) - (let loop () - (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2) - (begin - (if (common:low-noise-print 30 "our-host-load") - (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server.")) - (loop)))) - (if (< (server:choose-server areapath 'count) 20) - (server:run areapath)) - #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ((lock-file (conc areapath "/logs/server-start.lock"))) - (let* ((start-flag (conc areapath "/logs/server-start-last"))) - (common:simple-file-lock-and-wait lock-file expire-time: 25) - (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) - (system (conc "touch " start-flag)) ;; lazy but safe - (server:run areapath) - (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". - (common:simple-file-release-lock lock-file))) - (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another."))) - -;; this one seems to be the general entry point -;; -(define (server:start-and-wait areapath #!key (timeout 60)) - (let ((give-up-time (+ (current-seconds) timeout))) - (let loop ((server-info (server:check-if-running areapath)) - (try-num 0)) - (if (or server-info - (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. - (server:record->url server-info) - (let* ( (servers (server:choose-server areapath 'all-valid)) - (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0))) - (if (and (> try-num 0) ;; first time through simply wait a little while then try again - (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one - (server:run areapath)) - (thread-sleep! 5) - (loop (server:check-if-running areapath) - (+ try-num 1))))))) - -(define (server:get-num-servers #!key (numservers 2)) - (let ((ns (string->number - (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) - (or ns numservers))) - -;; no longer care if multiple servers are started by accident. older servers will drop off in time. -;; -(define (server:check-if-running areapath) ;; #!key (numservers "2")) - (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed - (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath)))) - (if (or (and servers - (null? servers)) - (not servers)) - ;; (and (list? servers) - ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers - #f - (let loop ((hed (car servers)) - (tal (cdr servers))) - (let ((res (server:check-server hed))) - (if res - hed - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - -;; ping the given server -;; -(define (server:check-server server-record) - (let* ((server-url (server:record->url server-record)) - (server-id (server:record->id server-record)) - (res (server:ping server-url server-id))) - (if res - server-url - #f))) - -(define (server:kill servr) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) - #f) - (match-let (((mod-time hostname port start-time server-id pid) - servr)) - (tasks:kill-server hostname pid)))) - -;; called in megatest.scm, host-port is string hostname:port -;; -;; NOTE: This is NOT called directly from clients as not all transports support a client running -;; in the same process as the server. -;; -(define (server:ping host:port server-id #!key (do-exit #f)) - (let* ((host-port (cond - ((string? host:port) - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f))) - (else - #f)))) - (cond - ((and (list? host-port) - (eq? (length host-port) 2)) - (let* ((myrunremote (make-remote)) - (iface (car host-port)) - (port (cadr host-port)) - (server-dat (client:connect iface port server-id myrunremote)) - (login-res (rmt:login-no-auto-client-setup myrunremote))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f)))) - (else - (if host:port - (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) - (if do-exit - (exit 1) - #f))))) - -;; run ping in separate process, safest way in some cases -;; -(define (server:ping-server ifaceport) - (with-input-from-pipe - (conc (common:get-megatest-exe) " -ping " ifaceport) - (lambda () - (let loop ((inl (read-line)) - (res "NOREPLY")) - (if (eof-object? inl) - (case (string->symbol res) - ((NOREPLY) #f) - ((LOGIN_OK) #t) - (else #f)) - (loop (read-line) inl)))))) - -;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). -;; -(define (server:login toppath) - (lambda (toppath) - (set! *db-last-access* (current-seconds)) ;; might not be needed. - (if (equal? *toppath* toppath) - #t - #f))) - -;; timeout is hms string: 1h 5m 3s, default is 1 minute -;; This is currently broken. Just use the number of hours with no unit. -;; Default is 60 seconds. -;; -(define (server:expiration-timeout) - (let ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (and (string? tmo) - (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below - (* 3600 (string->number tmo)) - 600))) - -(define (server:get-best-guess-address hostname) - (let ((res #f)) - (for-each - (lambda (adr) - (if (not (eq? (u8vector-ref adr 0) 127)) - (set! res adr))) - ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME - (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) - (string-intersperse - (map number->string - (u8vector->list - (if res res (hostname->ip hostname)))) "."))) - -;; (define server:sync-lock-token "SERVER_SYNC_LOCK") -;; (define (server:release-sync-lock) -;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -;; (define (server:have-sync-lock?) -;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) -;; (have-lock? (car have-lock-pair)) -;; (lock-time (cdr have-lock-pair)) -;; (lock-age (- (current-seconds) lock-time))) -;; (cond -;; (have-lock? #t) -;; ((>lock-age -;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) -;; (server:release-sync-lock) -;; (server:have-sync-lock?)) -;; (else #f)))) - -;; moving this here as it needs access to db and cannot be in common. -;; - -(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) - (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") - (lambda () - (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")) - #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh - (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) - (tmp-area (common:get-db-tmp-area)) - (tmp-db (conc tmp-area "/megatest.db")) - (staging-file (conc *toppath* "/.megatest.db")) - (mtdbfile (conc *toppath* "/megatest.db")) - (lockfile (common:get-sync-lock-filepath)) - (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (sync-cmd (if fork-to-background - (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") - sync-cmd-core)) - (default-min-intersync-delay 2) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) - (default-duty-cycle 0.1) - (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) - (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) - (calculate-off-time (lambda (work-duration duty-cycle) - (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) - (off-time min-intersync-delay) ;; adjusted in closure below. - (do-a-sync - (lambda () - (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) - (let* ((finalres - (let retry-loop ((num-tries 0)) - (if (common:simple-file-lock lockfile) - (begin - (cond - ((not (or fork-to-background persist-until-sync)) - (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay - " , off-time="off-time" seconds ]") - (thread-sleep! (max off-time min-intersync-delay))) - (else - (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd)) - (dbbackupfile (conc mtdbfile ".backup")) - (res2 - (cond - ((eq? 0 res ) - (handle-exceptions - exn - #f - (if (file-exists? dbbackupfile) - (delete-file* dbbackupfile) - ) - (if (eq? 0 (file-size sync-log)) - (delete-file* sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - - (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) - (set! off-time (calculate-off-time - last-sync-seconds - (cond - ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) - duty-cycle) - (else - (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) - default-duty-cycle)))) - - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) - 'sync-completed)) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) - #f)))) - (common:simple-file-release-lock lockfile) - (BB> "released lockfile: " lockfile) - (when (common:file-exists? lockfile) - (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) - res2) ;; end let - );; end begin - ;; else - (cond - (persist-until-sync - (thread-sleep! 1) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") - (retry-loop (add1 num-tries))) - (else - (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - 'parallel-sync-in-progress)) - ) ;; end if got lockfile - ) - )) - (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) - finalres) - ) ;; end lambda - )) - do-a-sync)) - +;; (require-extension (srfi 18) extras tcp s11n) +;; +;; (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest +;; directory-utils posix-extras matchable utils) +;; +;; (use spiffy uri-common intarweb http-client spiffy-request-vars) +;; +;; (declare (unit server)) +;; +;; (declare (uses commonmod)) +;; +;; (declare (uses common)) +;; (declare (uses db)) +;; (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +;; ;; (declare (uses synchash)) +;; (declare (uses http-transport)) +;; ;;(declare (uses rpc-transport)) +;; (declare (uses launch)) +;; ;; (declare (uses daemon)) +;; +;; (import commonmod) +;; +;; (include "common_records.scm") +;; (include "db_records.scm") +;; +;; (define (server:make-server-url hostport) +;; (if (not hostport) +;; #f +;; (conc "http://" (car hostport) ":" (cadr hostport)))) +;; +;; (define *server-loop-heart-beat* (current-seconds)) +;; +;; ;;====================================================================== +;; ;; P K T S S T U F F +;; ;;====================================================================== +;; +;; ;; ??? +;; +;; ;;====================================================================== +;; ;; P K T S S T U F F +;; ;;====================================================================== +;; +;; ;; ??? +;; +;; ;;====================================================================== +;; ;; S E R V E R +;; ;;====================================================================== +;; +;; ;; Call this to start the actual server +;; ;; +;; +;; ;;====================================================================== +;; ;; S E R V E R U T I L I T I E S +;; ;;====================================================================== +;; +;; ;; Get the transport +;; (define (server:get-transport) +;; (if *transport-type* +;; *transport-type* +;; (let ((ttype (string->symbol +;; (or (args:get-arg "-transport") +;; (configf:lookup *configdat* "server" "transport") +;; "rpc")))) +;; (set! *transport-type* ttype) +;; ttype))) +;; +;; ;; Generate a unique signature for this server +;; (define (server:mk-signature) +;; (message-digest-string (md5-primitive) +;; (with-output-to-string +;; (lambda () +;; (write (list (current-directory) +;; (current-process-id) +;; (argv))))))) +;; +;; (define (server:get-client-signature) +;; (if *my-client-signature* *my-client-signature* +;; (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic +;; (set! *my-client-signature* sig) +;; *my-client-signature*))) +;; +;; (define (server:get-server-id) +;; (if *server-id* *server-id* +;; (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic +;; (set! *server-id* sig) +;; *server-id*))) +;; +;; ;; When using zmq this would send the message back (two step process) +;; ;; with spiffy or rpc this simply returns the return data to be returned +;; ;; +;; (define (server:reply return-addr query-sig success/fail result) +;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) +;; ;; (send-message pubsock target send-more: #t) +;; ;; (send-message pubsock +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) +;; +;; ;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; ;; if the target-host is set +;; ;; try running on that host +;; ;; incidental: rotate logs in logs/ dir. +;; ;; +;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area +;; (let* ((testsuite (common:get-testsuite-name)) +;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) +;; (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") +;; "")) +;; (cmdln (conc (common:get-megatest-exe) +;; " -server - ";; (or target-host "-") +;; (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") +;; " -daemonize " +;; "") +;; ;; " -log " logfile +;; " -m testsuite:" testsuite +;; " " profile-mode +;; )) ;; (conc " >> " logfile " 2>&1 &"))))) +;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? +;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) +;; ;; we want the remote server to start in *toppath* so push there +;; (push-directory areapath) +;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") +;; (thread-start! log-rotate) +;; +;; ;; host.domain.tld match host? +;; ;; (if (and target-host +;; ;; ;; look at target host, is it host.domain.tld or ip address and does it +;; ;; ;; match current ip or hostname +;; ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) +;; ;; (not (equal? curr-ip target-host))) +;; ;; (begin +;; ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) +;; ;; (setenv "TARGETHOST" target-host))) +;; ;; +;; (setenv "TARGETHOST_LOGF" logfile) +;; (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time +;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) +;; (system (conc "nbfake " cmdln)) +;; (unsetenv "TARGETHOST_LOGF") +;; ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) +;; (thread-join! log-rotate) +;; (pop-directory))) +;; +;; ;; given a path to a server log return: host port startseconds server-id +;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let +;; ;; example of what it's looking for in the log file: +;; ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 +;; +;; (define (server:logf-get-start-info logf) +;; (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id +;; (dbprep-rx (regexp "^SERVER: dbprep")) +;; (dbprep-found 0) +;; (bad-dat (list #f #f #f #f #f))) +;; (handle-exceptions +;; exn +;; (begin +;; ;; WARNING: this is potentially dangerous to blanket ignore the errors +;; (if (file-exists? logf) +;; (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) +;; bad-dat) ;; no idea what went wrong, call it a bad server +;; (with-input-from-file +;; logf +;; (lambda () +;; (let loop ((inl (read-line)) +;; (lnum 0)) +;; (if (not (eof-object? inl)) +;; (let ((mlst (string-match server-rx inl)) +;; (dbprep (string-match dbprep-rx inl))) +;; (if dbprep (set! dbprep-found 1)) +;; (if (not mlst) +;; (if (< lnum 500) ;; give up if more than 500 lines of server log read +;; (loop (read-line)(+ lnum 1)) +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) +;; bad-dat)) +;; (match mlst +;; ((_ host port start server-id pid) +;; (list host +;; (string->number port) +;; (string->number start) +;; server-id +;; (string->number pid))) +;; (else +;; (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) +;; bad-dat)))) +;; (begin +;; (if dbprep-found +;; (begin +;; (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) +;; (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? +;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) +;; bad-dat)))))))) +;; +;; ;; ;; get a list of servers from the log files, with all relevant data +;; ;; ;; ( mod-time host port start-time pid ) +;; ;; ;; +;; ;; (define (server:get-list areapath #!key (limit #f)) +;; ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) +;; ;; (day-seconds (* 24 60 60))) +;; ;; ;; if the directory exists continue to get the list +;; ;; ;; otherwise attempt to create the logs dir and then +;; ;; ;; continue +;; ;; (if (if (directory-exists? (conc areapath "/logs")) +;; ;; '() +;; ;; (if (file-write-access? areapath) +;; ;; (begin +;; ;; (condition-case +;; ;; (create-directory (conc areapath "/logs") #t) +;; ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) +;; ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) +;; ;; (directory-exists? (conc areapath "/logs"))) +;; ;; '())) +;; ;; +;; ;; ;; Get the list of server logs. +;; ;; (let* ( +;; ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. +;; ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) +;; ;; (server-logs (glob (conc areapath "/logs/server-*-*.log"))) +;; ;; (num-serv-logs (length server-logs))) +;; ;; (if (or (null? server-logs) (= num-serv-logs 0)) +;; ;; (let () +;; ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) +;; ;; '() +;; ;; ) +;; ;; (let loop ((hed (string-chomp (car server-logs))) +;; ;; (tal (cdr server-logs)) +;; ;; (res '())) +;; ;; (let* ((mod-time (handle-exceptions +;; ;; exn +;; ;; (begin +;; ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) +;; ;; (current-seconds)) ;; 0 +;; ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted +;; ;; (down-time (- (current-seconds) mod-time)) +;; ;; (serv-dat (if (or (< num-serv-logs 10) +;; ;; (< down-time 900)) ;; day-seconds)) +;; ;; (server:logf-get-start-info hed) +;; ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at +;; ;; (serv-rec (cons mod-time serv-dat)) +;; ;; (fmatch (string-match fname-rx hed)) +;; ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) +;; ;; (new-res (if (null? serv-dat) +;; ;; res +;; ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let +;; ;; (if (null? tal) +;; ;; (if (and limit +;; ;; (> (length new-res) limit)) +;; ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work +;; ;; new-res) +;; ;; (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) +;; +;; #;(define (server:get-num-alive srvlst) +;; (let ((num-alive 0)) +;; (for-each +;; (lambda (server) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) +;; (match-let (((mod-time host port start-time server-id pid) +;; server)) +;; (let* ((uptime (- (current-seconds) mod-time)) +;; (runtime (if start-time +;; (- mod-time start-time) +;; 0))) +;; (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) +;; srvlst) +;; num-alive)) +;; +;; ;; ;; given a list of servers get a list of valid servers, i.e. at least +;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is +;; ;; ;; active (i.e. mod-time < 10 seconds +;; ;; ;; +;; ;; ;; mod-time host port start-time pid +;; ;; ;; +;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; ;; ;; and servers should stick around for about two hours or so. +;; ;; ;; +;; ;; (define (server:get-best srvlst) +;; ;; (let* ((nums (server:get-num-servers)) +;; ;; (now (current-seconds)) +;; ;; (slst (sort +;; ;; (filter (lambda (rec) +;; ;; (if (and (list? rec) +;; ;; (> (length rec) 2)) +;; ;; (let ((start-time (list-ref rec 3)) +;; ;; (mod-time (list-ref rec 0))) +;; ;; ;; (print "start-time: " start-time " mod-time: " mod-time) +;; ;; (and start-time mod-time +;; ;; (> (- now start-time) 0) ;; been running at least 0 seconds +;; ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds +;; ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set +;; ;; (< (- now start-time) +;; ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) +;; ;; 180) +;; ;; (random 360)))) ;; under one hour running time +/- 180 +;; ;; )) +;; ;; #f)) +;; ;; srvlst) +;; ;; (lambda (a b) +;; ;; (< (list-ref a 3) +;; ;; (list-ref b 3)))))) +;; ;; (if (> (length slst) nums) +;; ;; (take slst nums) +;; ;; slst))) +;; +;; ;; ;; switch from server:get-list to server:get-servers-info +;; ;; ;; +;; ;; (define (server:get-first-best areapath) +;; ;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; ;; (if (and srvrs +;; ;; (not (null? srvrs))) +;; ;; (car srvrs) +;; ;; #f))) +;; ;; +;; ;; (define (server:get-rand-best areapath) +;; ;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; ;; (if (and (list? srvrs) +;; ;; (not (null? srvrs))) +;; ;; (let* ((len (length srvrs)) +;; ;; (idx (random len))) +;; ;; (list-ref srvrs idx)) +;; ;; #f))) +;; +;; (define (server:record->id servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) +;; #f) +;; (match-let (((host port start-time server-id pid) +;; servr)) +;; (if server-id +;; server-id +;; #f)))) +;; +;; (define (server:record->url servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) +;; #f) +;; (match-let (((host port start-time server-id pid) +;; servr)) +;; (if (and host port) +;; (conc host ":" port) +;; #f)))) +;; +;; +;; ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough. +;; ;; if it is old enough, overwrite it and wait 0.25 seconds. +;; ;; if it then has the wrong server key, wait + 1 and call this function recursively. +;; ;; +;; #;(define (server:wait-for-server-start-last-flag areapath) +;; (let* ((start-flag (conc areapath "/logs/server-start-last")) +;; ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) +;; (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) +;; (server-key (conc (get-host-name) "-" (current-process-id)))) +;; (if (file-exists? start-flag) +;; (let* ((fmodtime (file-modification-time start-flag)) +;; (delta (- (current-seconds) fmodtime)) +;; (old-enough (> delta idletime)) +;; (new-server-key "")) +;; ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t. +;; ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process. +;; (if (and old-enough +;; (begin +;; (debug:print-info 2 *default-log-port* "Writing " start-flag) +;; (with-output-to-file start-flag (lambda () (print server-key))) +;; (thread-sleep! 0.25) +;; (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) +;; (equal? server-key new-server-key))) +;; #t +;; ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. +;; (begin +;; (debug:print-info 0 *default-log-port* "Gating server start, last start: " +;; (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) +;; +;; (thread-sleep! ( + 1 idletime)) +;; (server:wait-for-server-start-last-flag areapath))))))) +;; +;; ;; oldest server alive determines host then choose random of youngest +;; ;; five servers on that host +;; ;; +;; (define (server:get-servers-info areapath) +;; ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") +;; (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) +;; (if (not (file-exists? servinfodir)) +;; (create-directory servinfodir)) +;; (let* ((allfiles (glob (conc servinfodir"/*"))) +;; (res (make-hash-table))) +;; (for-each +;; (lambda (f) +;; (let* ((hostport (pathname-strip-directory f)) +;; (serverdat (server:logf-get-start-info f))) +;; (match serverdat +;; ((host port start server-id pid) +;; (if (and host port start server-id pid) +;; (hash-table-set! res hostport serverdat) +;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) +;; (else +;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) +;; allfiles) +;; res))) +;; +;; ;; check the .servinfo directory, are there other servers running on this +;; ;; or another host? +;; ;; +;; ;; returns #t => ok to start another server +;; ;; #f => not ok to start another server +;; ;; +;; (define (server:minimal-check areapath) +;; (server:clean-up-old areapath) +;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) +;; (servrs (glob (conc srvdir"/*"))) +;; (thishostip (server:get-best-guess-address (get-host-name))) +;; (thisservrs (glob (conc srvdir"/"thishostip":*"))) +;; (homehostinf (server:choose-server areapath 'homehost)) +;; (havehome (car homehostinf)) +;; (wearehome (cdr homehostinf))) +;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome +;; ", numservers: "(length thisservrs)) +;; (cond +;; ((not havehome) #t) ;; no homehost yet, go for it +;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another +;; ((and havehome (not wearehome)) #f) ;; we are not the home host +;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running +;; (else +;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) +;; #t)))) +;; +;; +;; (define server-last-start 0) +;; +;; +;; ;; oldest server alive determines host then choose random of youngest +;; ;; five servers on that host +;; ;; +;; ;; mode: +;; ;; best - get best server (random of newest five) +;; ;; home - get home host based on oldest server +;; ;; info - print info +;; (define (server:choose-server areapath #!optional (mode 'best)) +;; ;; age is current-starttime +;; ;; find oldest alive +;; ;; 1. sort by age ascending and ping until good +;; ;; find alive rand from youngest +;; ;; 1. sort by age descending +;; ;; 2. take five +;; ;; 3. check alive, discard if not and repeat +;; ;; first we clean up old server files +;; (server:clean-up-old areapath) +;; (let* ((since-last (- (current-seconds) server-last-start)) +;; (server-start-delay 10)) +;; (if ( < (- (current-seconds) server-last-start) 10 ) +;; (begin +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") +;; (thread-sleep! server-start-delay) +;; ) +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; ) +;; ) +;; (let* ((serversdat (server:get-servers-info areapath)) +;; (servkeys (hash-table-keys serversdat)) +;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last +;; (sort servkeys ;; list of "host:port" +;; (lambda (a b) +;; (>= (list-ref (hash-table-ref serversdat a) 2) +;; (list-ref (hash-table-ref serversdat b) 2)))) +;; '()))) +;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) +;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) +;; (if (not (null? by-time-asc)) +;; (let* ((oldest (last by-time-asc)) +;; (oldest-dat (hash-table-ref serversdat oldest)) +;; (host (list-ref oldest-dat 0)) +;; (all-valid (filter (lambda (x) +;; (equal? host (list-ref (hash-table-ref serversdat x) 0))) +;; by-time-asc)) +;; (best-ten (lambda () +;; (if (> (length all-valid) 11) +;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out +;; (if (> (length all-valid) 8) +;; (drop-right all-valid 1) +;; all-valid)))) +;; (names->dats (lambda (names) +;; (map (lambda (x) +;; (hash-table-ref serversdat x)) +;; names))) +;; (am-home? (lambda () +;; (let* ((currhost (get-host-name)) +;; (bestadrs (server:get-best-guess-address currhost))) +;; (or (equal? host currhost) +;; (equal? host bestadrs)))))) +;; (case mode +;; ((info) +;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) +;; (print "youngest: "(hash-table-ref serversdat (car all-valid)))) +;; ((home) host) +;; ((homehost) (cons host (am-home?))) ;; shut up old code +;; ((home?) (am-home?)) +;; ((best-ten)(names->dats (best-ten))) +;; ((all-valid)(names->dats all-valid)) +;; ((best) (let* ((best-ten (best-ten)) +;; (len (length best-ten))) +;; (hash-table-ref serversdat (list-ref best-ten (random len))))) +;; ((count)(length all-valid)) +;; (else +;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode) +;; #f))) +;; (begin +;; (server:run areapath) +;; (set! server-last-start (current-seconds)) +;; ;; (thread-sleep! 3) +;; (case mode +;; ((homehost) (cons #f #f)) +;; (else #f)))))) +;; +;; (define (server:get-servinfo-dir areapath) +;; (let* ((spath (conc areapath"/.servinfo"))) +;; (if (not (file-exists? spath)) +;; (create-directory spath #t)) +;; spath)) +;; +;; (define (server:clean-up-old areapath) +;; ;; any server file that has not been touched in ten minutes is effectively dead +;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) +;; (for-each +;; (lambda (sfile) +;; (let* ((modtime (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) +;; (current-seconds)) +;; (file-modification-time sfile)))) +;; (if (and (number? modtime) +;; (> (- (current-seconds) modtime) +;; 600)) +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) +;; (delete-file sfile)))))) +;; sfiles))) +;; +;; ;; would like to eventually get rid of this +;; ;; +;; (define (common:on-homehost?) +;; (server:choose-server *toppath* 'home?)) +;; +;; ;; kind start up of server, wait before allowing another server for a given +;; ;; area to be launched +;; ;; +;; (define (server:kind-run areapath) +;; ;; look for $MT_RUN_AREA_HOME/logs/server-start-last +;; ;; and wait for it to be at least seconds old +;; ;; (server:wait-for-server-start-last-flag areapath) +;; (let loop () +;; (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2) +;; (begin +;; (if (common:low-noise-print 30 "our-host-load") +;; (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server.")) +;; (loop)))) +;; (if (< (server:choose-server areapath 'count) 20) +;; (server:run areapath)) +;; #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? +;; (let* ((lock-file (conc areapath "/logs/server-start.lock"))) +;; (let* ((start-flag (conc areapath "/logs/server-start-last"))) +;; (common:simple-file-lock-and-wait lock-file expire-time: 25) +;; (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) +;; (system (conc "touch " start-flag)) ;; lazy but safe +;; (server:run areapath) +;; (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". +;; (common:simple-file-release-lock lock-file))) +;; (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another."))) +;; +;; ;; this one seems to be the general entry point +;; ;; +;; (define (server:start-and-wait areapath #!key (timeout 60)) +;; (let ((give-up-time (+ (current-seconds) timeout))) +;; (let loop ((server-info (server:check-if-running areapath)) +;; (try-num 0)) +;; (if (or server-info +;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. +;; (server:record->url server-info) +;; (let* ( (servers (server:choose-server areapath 'all-valid)) +;; (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0))) +;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again +;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one +;; (server:run areapath)) +;; (thread-sleep! 5) +;; (loop (server:check-if-running areapath) +;; (+ try-num 1))))))) +;; +;; (define (server:get-num-servers #!key (numservers 2)) +;; (let ((ns (string->number +;; (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) +;; (or ns numservers))) +;; +;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; ;; +;; (define (server:check-if-running areapath) ;; #!key (numservers "2")) +;; (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed +;; (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath)))) +;; (if (or (and servers +;; (null? servers)) +;; (not servers)) +;; ;; (and (list? servers) +;; ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers +;; #f +;; (let loop ((hed (car servers)) +;; (tal (cdr servers))) +;; (let ((res (server:check-server hed))) +;; (if res +;; hed +;; (if (null? tal) +;; #f +;; (loop (car tal)(cdr tal))))))))) +;; +;; ;; ping the given server +;; ;; +;; (define (server:check-server server-record) +;; (let* ((server-url (server:record->url server-record)) +;; (server-id (server:record->id server-record)) +;; (res (server:ping server-url server-id))) +;; (if res +;; server-url +;; #f))) +;; +;; (define (server:kill servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) +;; #f) +;; (match-let (((mod-time hostname port start-time server-id pid) +;; servr)) +;; (tasks:kill-server hostname pid)))) +;; +;; ;; called in megatest.scm, host-port is string hostname:port +;; ;; +;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; ;; in the same process as the server. +;; ;; +;; (define (server:ping host:port server-id #!key (do-exit #f)) +;; (let* ((host-port (cond +;; ((string? host:port) +;; (let ((slst (string-split host:port ":"))) +;; (if (eq? (length slst) 2) +;; (list (car slst)(string->number (cadr slst))) +;; #f))) +;; (else +;; #f)))) +;; (cond +;; ((and (list? host-port) +;; (eq? (length host-port) 2)) +;; (let* ((myrunremote (make-remote)) +;; (iface (car host-port)) +;; (port (cadr host-port)) +;; (server-dat (client:connect iface port server-id myrunremote)) +;; (login-res (rmt:login-no-auto-client-setup myrunremote))) +;; (if (and (list? login-res) +;; (car login-res)) +;; (begin +;; ;; (print "LOGIN_OK") +;; (if do-exit (exit 0)) +;; #t) +;; (begin +;; ;; (print "LOGIN_FAILED") +;; (if do-exit (exit 1)) +;; #f)))) +;; (else +;; (if host:port +;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) +;; (if do-exit +;; (exit 1) +;; #f))))) +;; +;; ;; run ping in separate process, safest way in some cases +;; ;; +;; (define (server:ping-server ifaceport) +;; (with-input-from-pipe +;; (conc (common:get-megatest-exe) " -ping " ifaceport) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res "NOREPLY")) +;; (if (eof-object? inl) +;; (case (string->symbol res) +;; ((NOREPLY) #f) +;; ((LOGIN_OK) #t) +;; (else #f)) +;; (loop (read-line) inl)))))) +;; +;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; ;; +;; (define (server:login toppath) +;; (lambda (toppath) +;; (set! *db-last-access* (current-seconds)) ;; might not be needed. +;; (if (equal? *toppath* toppath) +;; #t +;; #f))) +;; +;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute +;; ;; This is currently broken. Just use the number of hours with no unit. +;; ;; Default is 60 seconds. +;; ;; +;; (define (server:expiration-timeout) +;; (let ((tmo (configf:lookup *configdat* "server" "timeout"))) +;; (if (and (string? tmo) +;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below +;; (* 3600 (string->number tmo)) +;; 600))) +;; +;; (define (server:get-best-guess-address hostname) +;; (let ((res #f)) +;; (for-each +;; (lambda (adr) +;; (if (not (eq? (u8vector-ref adr 0) 127)) +;; (set! res adr))) +;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME +;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) +;; (string-intersperse +;; (map number->string +;; (u8vector->list +;; (if res res (hostname->ip hostname)))) "."))) +;; +;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; ;; (define (server:release-sync-lock) +;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; ;; (define (server:have-sync-lock?) +;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; ;; (have-lock? (car have-lock-pair)) +;; ;; (lock-time (cdr have-lock-pair)) +;; ;; (lock-age (- (current-seconds) lock-time))) +;; ;; (cond +;; ;; (have-lock? #t) +;; ;; ((>lock-age +;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; ;; (server:release-sync-lock) +;; ;; (server:have-sync-lock?)) +;; ;; (else #f)))) +;; +;; ;; moving this here as it needs access to db and cannot be in common. +;; ;; +;; +;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) +;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") +;; (lambda () +;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")) +;; #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh +;; (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) +;; (tmp-area (common:get-db-tmp-area)) +;; (tmp-db (conc tmp-area "/megatest.db")) +;; (staging-file (conc *toppath* "/.megatest.db")) +;; (mtdbfile (conc *toppath* "/megatest.db")) +;; (lockfile (common:get-sync-lock-filepath)) +;; (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) +;; (sync-cmd (if fork-to-background +;; (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") +;; sync-cmd-core)) +;; (default-min-intersync-delay 2) +;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) +;; (default-duty-cycle 0.1) +;; (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) +;; (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) +;; (calculate-off-time (lambda (work-duration duty-cycle) +;; (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) +;; (off-time min-intersync-delay) ;; adjusted in closure below. +;; (do-a-sync +;; (lambda () +;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) +;; (let* ((finalres +;; (let retry-loop ((num-tries 0)) +;; (if (common:simple-file-lock lockfile) +;; (begin +;; (cond +;; ((not (or fork-to-background persist-until-sync)) +;; (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay +;; " , off-time="off-time" seconds ]") +;; (thread-sleep! (max off-time min-intersync-delay))) +;; (else +;; (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) +;; +;; (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) +;; (common:snapshot-file mtdbfile subdir: ".db-snapshot")) +;; (delete-file* staging-file) +;; (let* ((start-time (current-milliseconds)) +;; (res (system sync-cmd)) +;; (dbbackupfile (conc mtdbfile ".backup")) +;; (res2 +;; (cond +;; ((eq? 0 res ) +;; (handle-exceptions +;; exn +;; #f +;; (if (file-exists? dbbackupfile) +;; (delete-file* dbbackupfile) +;; ) +;; (if (eq? 0 (file-size sync-log)) +;; (delete-file* sync-log)) +;; (system (conc "/bin/mv " staging-file " " mtdbfile)) +;; +;; (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) +;; (set! off-time (calculate-off-time +;; last-sync-seconds +;; (cond +;; ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) +;; duty-cycle) +;; (else +;; (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) +;; default-duty-cycle)))) +;; +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) +;; 'sync-completed)) +;; (else +;; (system (conc "/bin/cp "sync-log" "sync-log".fail")) +;; (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") +;; (if (file-exists? (conc mtdbfile ".backup")) +;; (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) +;; #f)))) +;; (common:simple-file-release-lock lockfile) +;; (BB> "released lockfile: " lockfile) +;; (when (common:file-exists? lockfile) +;; (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) +;; res2) ;; end let +;; );; end begin +;; ;; else +;; (cond +;; (persist-until-sync +;; (thread-sleep! 1) +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") +;; (retry-loop (add1 num-tries))) +;; (else +;; (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") +;; 'parallel-sync-in-progress)) +;; ) ;; end if got lockfile +;; ) +;; )) +;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) +;; finalres) +;; ) ;; end lambda +;; )) +;; do-a-sync)) +;; +;; ADDED servermod.scm Index: servermod.scm ================================================================== --- /dev/null +++ servermod.scm @@ -0,0 +1,821 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +(declare (unit servermod)) + +(module servermod +* + +(import scheme + chicken + + md5 + message-digest + ports + posix + ) + +(define *client-server-id* #f) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; Generate a unique signature for this server +(define (mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (current-process-id) + (argv))))))) + +(define (get-client-server-id) + (if *client-server-id* *client-server-id* + (let ((sig (mk-signature))) ;; clients re-use the server:mk-signature logic + (set! *client-server-id* sig) + *client-server-id*))) + +;; ;; When using zmq this would send the message back (two step process) +;; ;; with spiffy or rpc this simply returns the return data to be returned +;; ;; +;; (define (server:reply return-addr query-sig success/fail result) +;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) +;; ;; (send-message pubsock target send-more: #t) +;; ;; (send-message pubsock +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) +;; +;; ;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; ;; if the target-host is set +;; ;; try running on that host +;; ;; incidental: rotate logs in logs/ dir. +;; ;; +;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area +;; (let* ((testsuite (common:get-testsuite-name)) +;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) +;; (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") +;; "")) +;; (cmdln (conc (common:get-megatest-exe) +;; " -server - ";; (or target-host "-") +;; (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") +;; " -daemonize " +;; "") +;; ;; " -log " logfile +;; " -m testsuite:" testsuite +;; " " profile-mode +;; )) ;; (conc " >> " logfile " 2>&1 &"))))) +;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? +;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) +;; ;; we want the remote server to start in *toppath* so push there +;; (push-directory areapath) +;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") +;; (thread-start! log-rotate) +;; +;; ;; host.domain.tld match host? +;; ;; (if (and target-host +;; ;; ;; look at target host, is it host.domain.tld or ip address and does it +;; ;; ;; match current ip or hostname +;; ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) +;; ;; (not (equal? curr-ip target-host))) +;; ;; (begin +;; ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) +;; ;; (setenv "TARGETHOST" target-host))) +;; ;; +;; (setenv "TARGETHOST_LOGF" logfile) +;; (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time +;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) +;; (system (conc "nbfake " cmdln)) +;; (unsetenv "TARGETHOST_LOGF") +;; ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) +;; (thread-join! log-rotate) +;; (pop-directory))) +;; +;; ;; given a path to a server log return: host port startseconds server-id +;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let +;; ;; example of what it's looking for in the log file: +;; ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 +;; +;; (define (server:logf-get-start-info logf) +;; (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id +;; (dbprep-rx (regexp "^SERVER: dbprep")) +;; (dbprep-found 0) +;; (bad-dat (list #f #f #f #f #f))) +;; (handle-exceptions +;; exn +;; (begin +;; ;; WARNING: this is potentially dangerous to blanket ignore the errors +;; (if (file-exists? logf) +;; (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) +;; bad-dat) ;; no idea what went wrong, call it a bad server +;; (with-input-from-file +;; logf +;; (lambda () +;; (let loop ((inl (read-line)) +;; (lnum 0)) +;; (if (not (eof-object? inl)) +;; (let ((mlst (string-match server-rx inl)) +;; (dbprep (string-match dbprep-rx inl))) +;; (if dbprep (set! dbprep-found 1)) +;; (if (not mlst) +;; (if (< lnum 500) ;; give up if more than 500 lines of server log read +;; (loop (read-line)(+ lnum 1)) +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) +;; bad-dat)) +;; (match mlst +;; ((_ host port start server-id pid) +;; (list host +;; (string->number port) +;; (string->number start) +;; server-id +;; (string->number pid))) +;; (else +;; (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) +;; bad-dat)))) +;; (begin +;; (if dbprep-found +;; (begin +;; (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) +;; (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? +;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) +;; bad-dat)))))))) +;; +;; ;; ;; get a list of servers from the log files, with all relevant data +;; ;; ;; ( mod-time host port start-time pid ) +;; ;; ;; +;; ;; (define (server:get-list areapath #!key (limit #f)) +;; ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) +;; ;; (day-seconds (* 24 60 60))) +;; ;; ;; if the directory exists continue to get the list +;; ;; ;; otherwise attempt to create the logs dir and then +;; ;; ;; continue +;; ;; (if (if (directory-exists? (conc areapath "/logs")) +;; ;; '() +;; ;; (if (file-write-access? areapath) +;; ;; (begin +;; ;; (condition-case +;; ;; (create-directory (conc areapath "/logs") #t) +;; ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) +;; ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) +;; ;; (directory-exists? (conc areapath "/logs"))) +;; ;; '())) +;; ;; +;; ;; ;; Get the list of server logs. +;; ;; (let* ( +;; ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. +;; ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) +;; ;; (server-logs (glob (conc areapath "/logs/server-*-*.log"))) +;; ;; (num-serv-logs (length server-logs))) +;; ;; (if (or (null? server-logs) (= num-serv-logs 0)) +;; ;; (let () +;; ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) +;; ;; '() +;; ;; ) +;; ;; (let loop ((hed (string-chomp (car server-logs))) +;; ;; (tal (cdr server-logs)) +;; ;; (res '())) +;; ;; (let* ((mod-time (handle-exceptions +;; ;; exn +;; ;; (begin +;; ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) +;; ;; (current-seconds)) ;; 0 +;; ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted +;; ;; (down-time (- (current-seconds) mod-time)) +;; ;; (serv-dat (if (or (< num-serv-logs 10) +;; ;; (< down-time 900)) ;; day-seconds)) +;; ;; (server:logf-get-start-info hed) +;; ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at +;; ;; (serv-rec (cons mod-time serv-dat)) +;; ;; (fmatch (string-match fname-rx hed)) +;; ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) +;; ;; (new-res (if (null? serv-dat) +;; ;; res +;; ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let +;; ;; (if (null? tal) +;; ;; (if (and limit +;; ;; (> (length new-res) limit)) +;; ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work +;; ;; new-res) +;; ;; (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) +;; +;; #;(define (server:get-num-alive srvlst) +;; (let ((num-alive 0)) +;; (for-each +;; (lambda (server) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) +;; (match-let (((mod-time host port start-time server-id pid) +;; server)) +;; (let* ((uptime (- (current-seconds) mod-time)) +;; (runtime (if start-time +;; (- mod-time start-time) +;; 0))) +;; (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) +;; srvlst) +;; num-alive)) +;; +;; ;; ;; given a list of servers get a list of valid servers, i.e. at least +;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is +;; ;; ;; active (i.e. mod-time < 10 seconds +;; ;; ;; +;; ;; ;; mod-time host port start-time pid +;; ;; ;; +;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; ;; ;; and servers should stick around for about two hours or so. +;; ;; ;; +;; ;; (define (server:get-best srvlst) +;; ;; (let* ((nums (server:get-num-servers)) +;; ;; (now (current-seconds)) +;; ;; (slst (sort +;; ;; (filter (lambda (rec) +;; ;; (if (and (list? rec) +;; ;; (> (length rec) 2)) +;; ;; (let ((start-time (list-ref rec 3)) +;; ;; (mod-time (list-ref rec 0))) +;; ;; ;; (print "start-time: " start-time " mod-time: " mod-time) +;; ;; (and start-time mod-time +;; ;; (> (- now start-time) 0) ;; been running at least 0 seconds +;; ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds +;; ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set +;; ;; (< (- now start-time) +;; ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) +;; ;; 180) +;; ;; (random 360)))) ;; under one hour running time +/- 180 +;; ;; )) +;; ;; #f)) +;; ;; srvlst) +;; ;; (lambda (a b) +;; ;; (< (list-ref a 3) +;; ;; (list-ref b 3)))))) +;; ;; (if (> (length slst) nums) +;; ;; (take slst nums) +;; ;; slst))) +;; +;; ;; ;; switch from server:get-list to server:get-servers-info +;; ;; ;; +;; ;; (define (server:get-first-best areapath) +;; ;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; ;; (if (and srvrs +;; ;; (not (null? srvrs))) +;; ;; (car srvrs) +;; ;; #f))) +;; ;; +;; ;; (define (server:get-rand-best areapath) +;; ;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; ;; (if (and (list? srvrs) +;; ;; (not (null? srvrs))) +;; ;; (let* ((len (length srvrs)) +;; ;; (idx (random len))) +;; ;; (list-ref srvrs idx)) +;; ;; #f))) +;; +;; (define (server:record->id servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) +;; #f) +;; (match-let (((host port start-time server-id pid) +;; servr)) +;; (if server-id +;; server-id +;; #f)))) +;; +;; (define (server:record->url servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) +;; #f) +;; (match-let (((host port start-time server-id pid) +;; servr)) +;; (if (and host port) +;; (conc host ":" port) +;; #f)))) +;; +;; +;; ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough. +;; ;; if it is old enough, overwrite it and wait 0.25 seconds. +;; ;; if it then has the wrong server key, wait + 1 and call this function recursively. +;; ;; +;; #;(define (server:wait-for-server-start-last-flag areapath) +;; (let* ((start-flag (conc areapath "/logs/server-start-last")) +;; ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) +;; (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) +;; (server-key (conc (get-host-name) "-" (current-process-id)))) +;; (if (file-exists? start-flag) +;; (let* ((fmodtime (file-modification-time start-flag)) +;; (delta (- (current-seconds) fmodtime)) +;; (old-enough (> delta idletime)) +;; (new-server-key "")) +;; ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t. +;; ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process. +;; (if (and old-enough +;; (begin +;; (debug:print-info 2 *default-log-port* "Writing " start-flag) +;; (with-output-to-file start-flag (lambda () (print server-key))) +;; (thread-sleep! 0.25) +;; (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) +;; (equal? server-key new-server-key))) +;; #t +;; ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. +;; (begin +;; (debug:print-info 0 *default-log-port* "Gating server start, last start: " +;; (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) +;; +;; (thread-sleep! ( + 1 idletime)) +;; (server:wait-for-server-start-last-flag areapath))))))) +;; +;; ;; oldest server alive determines host then choose random of youngest +;; ;; five servers on that host +;; ;; +;; (define (server:get-servers-info areapath) +;; ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") +;; (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) +;; (if (not (file-exists? servinfodir)) +;; (create-directory servinfodir)) +;; (let* ((allfiles (glob (conc servinfodir"/*"))) +;; (res (make-hash-table))) +;; (for-each +;; (lambda (f) +;; (let* ((hostport (pathname-strip-directory f)) +;; (serverdat (server:logf-get-start-info f))) +;; (match serverdat +;; ((host port start server-id pid) +;; (if (and host port start server-id pid) +;; (hash-table-set! res hostport serverdat) +;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) +;; (else +;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) +;; allfiles) +;; res))) +;; +;; ;; check the .servinfo directory, are there other servers running on this +;; ;; or another host? +;; ;; +;; ;; returns #t => ok to start another server +;; ;; #f => not ok to start another server +;; ;; +;; (define (server:minimal-check areapath) +;; (server:clean-up-old areapath) +;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) +;; (servrs (glob (conc srvdir"/*"))) +;; (thishostip (server:get-best-guess-address (get-host-name))) +;; (thisservrs (glob (conc srvdir"/"thishostip":*"))) +;; (homehostinf (server:choose-server areapath 'homehost)) +;; (havehome (car homehostinf)) +;; (wearehome (cdr homehostinf))) +;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome +;; ", numservers: "(length thisservrs)) +;; (cond +;; ((not havehome) #t) ;; no homehost yet, go for it +;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another +;; ((and havehome (not wearehome)) #f) ;; we are not the home host +;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running +;; (else +;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) +;; #t)))) +;; +;; +;; (define server-last-start 0) +;; +;; +;; ;; oldest server alive determines host then choose random of youngest +;; ;; five servers on that host +;; ;; +;; ;; mode: +;; ;; best - get best server (random of newest five) +;; ;; home - get home host based on oldest server +;; ;; info - print info +;; (define (server:choose-server areapath #!optional (mode 'best)) +;; ;; age is current-starttime +;; ;; find oldest alive +;; ;; 1. sort by age ascending and ping until good +;; ;; find alive rand from youngest +;; ;; 1. sort by age descending +;; ;; 2. take five +;; ;; 3. check alive, discard if not and repeat +;; ;; first we clean up old server files +;; (server:clean-up-old areapath) +;; (let* ((since-last (- (current-seconds) server-last-start)) +;; (server-start-delay 10)) +;; (if ( < (- (current-seconds) server-last-start) 10 ) +;; (begin +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") +;; (thread-sleep! server-start-delay) +;; ) +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; ) +;; ) +;; (let* ((serversdat (server:get-servers-info areapath)) +;; (servkeys (hash-table-keys serversdat)) +;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last +;; (sort servkeys ;; list of "host:port" +;; (lambda (a b) +;; (>= (list-ref (hash-table-ref serversdat a) 2) +;; (list-ref (hash-table-ref serversdat b) 2)))) +;; '()))) +;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) +;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) +;; (if (not (null? by-time-asc)) +;; (let* ((oldest (last by-time-asc)) +;; (oldest-dat (hash-table-ref serversdat oldest)) +;; (host (list-ref oldest-dat 0)) +;; (all-valid (filter (lambda (x) +;; (equal? host (list-ref (hash-table-ref serversdat x) 0))) +;; by-time-asc)) +;; (best-ten (lambda () +;; (if (> (length all-valid) 11) +;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out +;; (if (> (length all-valid) 8) +;; (drop-right all-valid 1) +;; all-valid)))) +;; (names->dats (lambda (names) +;; (map (lambda (x) +;; (hash-table-ref serversdat x)) +;; names))) +;; (am-home? (lambda () +;; (let* ((currhost (get-host-name)) +;; (bestadrs (server:get-best-guess-address currhost))) +;; (or (equal? host currhost) +;; (equal? host bestadrs)))))) +;; (case mode +;; ((info) +;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) +;; (print "youngest: "(hash-table-ref serversdat (car all-valid)))) +;; ((home) host) +;; ((homehost) (cons host (am-home?))) ;; shut up old code +;; ((home?) (am-home?)) +;; ((best-ten)(names->dats (best-ten))) +;; ((all-valid)(names->dats all-valid)) +;; ((best) (let* ((best-ten (best-ten)) +;; (len (length best-ten))) +;; (hash-table-ref serversdat (list-ref best-ten (random len))))) +;; ((count)(length all-valid)) +;; (else +;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode) +;; #f))) +;; (begin +;; (server:run areapath) +;; (set! server-last-start (current-seconds)) +;; ;; (thread-sleep! 3) +;; (case mode +;; ((homehost) (cons #f #f)) +;; (else #f)))))) +;; +;; (define (server:get-servinfo-dir areapath) +;; (let* ((spath (conc areapath"/.servinfo"))) +;; (if (not (file-exists? spath)) +;; (create-directory spath #t)) +;; spath)) +;; +;; (define (server:clean-up-old areapath) +;; ;; any server file that has not been touched in ten minutes is effectively dead +;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) +;; (for-each +;; (lambda (sfile) +;; (let* ((modtime (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) +;; (current-seconds)) +;; (file-modification-time sfile)))) +;; (if (and (number? modtime) +;; (> (- (current-seconds) modtime) +;; 600)) +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) +;; (delete-file sfile)))))) +;; sfiles))) +;; +;; ;; would like to eventually get rid of this +;; ;; +;; (define (common:on-homehost?) +;; (server:choose-server *toppath* 'home?)) +;; +;; ;; kind start up of server, wait before allowing another server for a given +;; ;; area to be launched +;; ;; +;; (define (server:kind-run areapath) +;; ;; look for $MT_RUN_AREA_HOME/logs/server-start-last +;; ;; and wait for it to be at least seconds old +;; ;; (server:wait-for-server-start-last-flag areapath) +;; (let loop () +;; (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2) +;; (begin +;; (if (common:low-noise-print 30 "our-host-load") +;; (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server.")) +;; (loop)))) +;; (if (< (server:choose-server areapath 'count) 20) +;; (server:run areapath)) +;; #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? +;; (let* ((lock-file (conc areapath "/logs/server-start.lock"))) +;; (let* ((start-flag (conc areapath "/logs/server-start-last"))) +;; (common:simple-file-lock-and-wait lock-file expire-time: 25) +;; (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) +;; (system (conc "touch " start-flag)) ;; lazy but safe +;; (server:run areapath) +;; (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". +;; (common:simple-file-release-lock lock-file))) +;; (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another."))) +;; +;; ;; this one seems to be the general entry point +;; ;; +;; (define (server:start-and-wait areapath #!key (timeout 60)) +;; (let ((give-up-time (+ (current-seconds) timeout))) +;; (let loop ((server-info (server:check-if-running areapath)) +;; (try-num 0)) +;; (if (or server-info +;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. +;; (server:record->url server-info) +;; (let* ( (servers (server:choose-server areapath 'all-valid)) +;; (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0))) +;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again +;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one +;; (server:run areapath)) +;; (thread-sleep! 5) +;; (loop (server:check-if-running areapath) +;; (+ try-num 1))))))) +;; +;; (define (server:get-num-servers #!key (numservers 2)) +;; (let ((ns (string->number +;; (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) +;; (or ns numservers))) +;; +;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; ;; +;; (define (server:check-if-running areapath) ;; #!key (numservers "2")) +;; (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed +;; (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath)))) +;; (if (or (and servers +;; (null? servers)) +;; (not servers)) +;; ;; (and (list? servers) +;; ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers +;; #f +;; (let loop ((hed (car servers)) +;; (tal (cdr servers))) +;; (let ((res (server:check-server hed))) +;; (if res +;; hed +;; (if (null? tal) +;; #f +;; (loop (car tal)(cdr tal))))))))) +;; +;; ;; ping the given server +;; ;; +;; (define (server:check-server server-record) +;; (let* ((server-url (server:record->url server-record)) +;; (server-id (server:record->id server-record)) +;; (res (server:ping server-url server-id))) +;; (if res +;; server-url +;; #f))) +;; +;; (define (server:kill servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) +;; #f) +;; (match-let (((mod-time hostname port start-time server-id pid) +;; servr)) +;; (tasks:kill-server hostname pid)))) +;; +;; ;; called in megatest.scm, host-port is string hostname:port +;; ;; +;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; ;; in the same process as the server. +;; ;; +;; (define (server:ping host:port server-id #!key (do-exit #f)) +;; (let* ((host-port (cond +;; ((string? host:port) +;; (let ((slst (string-split host:port ":"))) +;; (if (eq? (length slst) 2) +;; (list (car slst)(string->number (cadr slst))) +;; #f))) +;; (else +;; #f)))) +;; (cond +;; ((and (list? host-port) +;; (eq? (length host-port) 2)) +;; (let* ((myrunremote (make-remote)) +;; (iface (car host-port)) +;; (port (cadr host-port)) +;; (server-dat (client:connect iface port server-id myrunremote)) +;; (login-res (rmt:login-no-auto-client-setup myrunremote))) +;; (if (and (list? login-res) +;; (car login-res)) +;; (begin +;; ;; (print "LOGIN_OK") +;; (if do-exit (exit 0)) +;; #t) +;; (begin +;; ;; (print "LOGIN_FAILED") +;; (if do-exit (exit 1)) +;; #f)))) +;; (else +;; (if host:port +;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) +;; (if do-exit +;; (exit 1) +;; #f))))) +;; +;; ;; run ping in separate process, safest way in some cases +;; ;; +;; (define (server:ping-server ifaceport) +;; (with-input-from-pipe +;; (conc (common:get-megatest-exe) " -ping " ifaceport) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res "NOREPLY")) +;; (if (eof-object? inl) +;; (case (string->symbol res) +;; ((NOREPLY) #f) +;; ((LOGIN_OK) #t) +;; (else #f)) +;; (loop (read-line) inl)))))) +;; +;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; ;; +;; (define (server:login toppath) +;; (lambda (toppath) +;; (set! *db-last-access* (current-seconds)) ;; might not be needed. +;; (if (equal? *toppath* toppath) +;; #t +;; #f))) +;; +;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute +;; ;; This is currently broken. Just use the number of hours with no unit. +;; ;; Default is 60 seconds. +;; ;; +;; (define (server:expiration-timeout) +;; (let ((tmo (configf:lookup *configdat* "server" "timeout"))) +;; (if (and (string? tmo) +;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below +;; (* 3600 (string->number tmo)) +;; 600))) +;; +;; (define (server:get-best-guess-address hostname) +;; (let ((res #f)) +;; (for-each +;; (lambda (adr) +;; (if (not (eq? (u8vector-ref adr 0) 127)) +;; (set! res adr))) +;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME +;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) +;; (string-intersperse +;; (map number->string +;; (u8vector->list +;; (if res res (hostname->ip hostname)))) "."))) +;; +;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; ;; (define (server:release-sync-lock) +;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; ;; (define (server:have-sync-lock?) +;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; ;; (have-lock? (car have-lock-pair)) +;; ;; (lock-time (cdr have-lock-pair)) +;; ;; (lock-age (- (current-seconds) lock-time))) +;; ;; (cond +;; ;; (have-lock? #t) +;; ;; ((>lock-age +;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; ;; (server:release-sync-lock) +;; ;; (server:have-sync-lock?)) +;; ;; (else #f)))) +;; +;; ;; moving this here as it needs access to db and cannot be in common. +;; ;; +;; +;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) +;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") +;; (lambda () +;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")) +;; #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh +;; (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) +;; (tmp-area (common:get-db-tmp-area)) +;; (tmp-db (conc tmp-area "/megatest.db")) +;; (staging-file (conc *toppath* "/.megatest.db")) +;; (mtdbfile (conc *toppath* "/megatest.db")) +;; (lockfile (common:get-sync-lock-filepath)) +;; (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) +;; (sync-cmd (if fork-to-background +;; (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") +;; sync-cmd-core)) +;; (default-min-intersync-delay 2) +;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) +;; (default-duty-cycle 0.1) +;; (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) +;; (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) +;; (calculate-off-time (lambda (work-duration duty-cycle) +;; (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) +;; (off-time min-intersync-delay) ;; adjusted in closure below. +;; (do-a-sync +;; (lambda () +;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) +;; (let* ((finalres +;; (let retry-loop ((num-tries 0)) +;; (if (common:simple-file-lock lockfile) +;; (begin +;; (cond +;; ((not (or fork-to-background persist-until-sync)) +;; (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay +;; " , off-time="off-time" seconds ]") +;; (thread-sleep! (max off-time min-intersync-delay))) +;; (else +;; (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) +;; +;; (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) +;; (common:snapshot-file mtdbfile subdir: ".db-snapshot")) +;; (delete-file* staging-file) +;; (let* ((start-time (current-milliseconds)) +;; (res (system sync-cmd)) +;; (dbbackupfile (conc mtdbfile ".backup")) +;; (res2 +;; (cond +;; ((eq? 0 res ) +;; (handle-exceptions +;; exn +;; #f +;; (if (file-exists? dbbackupfile) +;; (delete-file* dbbackupfile) +;; ) +;; (if (eq? 0 (file-size sync-log)) +;; (delete-file* sync-log)) +;; (system (conc "/bin/mv " staging-file " " mtdbfile)) +;; +;; (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) +;; (set! off-time (calculate-off-time +;; last-sync-seconds +;; (cond +;; ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) +;; duty-cycle) +;; (else +;; (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) +;; default-duty-cycle)))) +;; +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) +;; 'sync-completed)) +;; (else +;; (system (conc "/bin/cp "sync-log" "sync-log".fail")) +;; (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") +;; (if (file-exists? (conc mtdbfile ".backup")) +;; (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) +;; #f)))) +;; (common:simple-file-release-lock lockfile) +;; (BB> "released lockfile: " lockfile) +;; (when (common:file-exists? lockfile) +;; (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) +;; res2) ;; end let +;; );; end begin +;; ;; else +;; (cond +;; (persist-until-sync +;; (thread-sleep! 1) +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") +;; (retry-loop (add1 num-tries))) +;; (else +;; (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") +;; 'parallel-sync-in-progress)) +;; ) ;; end if got lockfile +;; ) +;; )) +;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) +;; finalres) +;; ) ;; end lambda +;; )) +;; do-a-sync)) +;; +;; + +) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -22,11 +22,11 @@ (import (prefix sqlite3 sqlite3:)) (declare (unit tasks)) (declare (uses dbfile)) (declare (uses db)) -(declare (uses rmt)) +(declare (uses rmtmod)) (declare (uses common)) (declare (uses pgdb)) (import dbfile) ;; (import pgdb) ;; pgdb is a module Index: tcmt.scm ================================================================== --- tcmt.scm +++ tcmt.scm @@ -27,11 +27,11 @@ (use trace) ;; (trace-call-sites #t) (declare (uses margs)) -(declare (uses rmt)) +(declare (uses rmtmod)) (declare (uses common)) ;; (declare (uses megatest-version)) (include "megatest-version.scm") (include "megatest-fossil-hash.scm") Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -29,11 +29,11 @@ (declare (unit tdb)) (declare (uses common)) (declare (uses keys)) (declare (uses ods)) -(declare (uses client)) +(declare (uses clientmod)) (declare (uses mt)) (declare (uses db)) (include "common_records.scm") (include "db_records.scm") Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -30,11 +30,11 @@ (declare (uses commonmod)) ;; (declare (uses dcommon)) ;; needed for the steps processing (declare (uses items)) (declare (uses runconfig)) ;; (declare (uses sdb)) -(declare (uses server)) +(declare (uses servermod)) ;;(declare (uses stml2)) (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils) (import (prefix sqlite3 sqlite3:)) (import commonmod) Index: tree.scm ================================================================== --- tree.scm +++ tree.scm @@ -30,11 +30,11 @@ (declare (uses margs)) (declare (uses launch)) ;; (declare (uses megatest-version)) (declare (uses gutils)) (declare (uses db)) -(declare (uses server)) +;; (declare (uses server)) ;; (declare (uses synchash)) (declare (uses dcommon)) (include "megatest-version.scm") (include "common_records.scm")