Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -20,21 +20,23 @@
SHELL=/bin/bash
PREFIX=$(PWD)
CSCOPTS=
INSTALL=install
SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm \
- server.scm configf.scm db.scm keys.scm margs.scm \
- process.scm runs.scm tasks.scm tests.scm genexample.scm \
- http-transport.scm tdb.scm client.scm mt.scm \
- ezsteps.scm lock-queue.scm rmt.scm api.scm \
- subrun.scm portlogger.scm archive.scm env.scm \
- diff-report.scm cgisetup/models/pgdb.scm
+ configf.scm db.scm keys.scm margs.scm process.scm runs.scm \
+ tasks.scm tests.scm genexample.scm tdb.scm mt.scm \
+ ezsteps.scm lock-queue.scm api.scm subrun.scm \
+ portlogger.scm archive.scm env.scm diff-report.scm \
+ cgisetup/models/pgdb.scm
+
+# server.scm http-transport.scm client.scm rmt.scm
# module source files
-MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm
+MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
+ servermod.scm clientmod.scm rmtmod.scm
-all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
+all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut
# dbmod.import.o is just a hack here
mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o
@@ -258,10 +260,22 @@
$(PREFIX)/bin/mtexec : $(PREFIX)/bin/.$(ARCHSTR)/mtexec utils/mk_wrapper
utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec
chmod a+x $(PREFIX)/bin/mtexec
+# mtserv
+
+mtserv: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtserv.scm
+ csc $(CSCOPTS) $(OFILES) $(MOFILES) mtserv.scm -o mtserv
+
+$(PREFIX)/bin/.$(ARCHSTR)/mtserv : mtserv
+ $(INSTALL) mtserv $(PREFIX)/bin/.$(ARCHSTR)/mtserv
+
+$(PREFIX)/bin/mtserv : $(PREFIX)/bin/.$(ARCHSTR)/mtserv utils/mk_wrapper
+ utils/mk_wrapper $(PREFIX) mtserv $(PREFIX)/bin/mtserv
+ chmod a+x $(PREFIX)/bin/mtserv
+
# tcmt
$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
$(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -21,18 +21,19 @@
;;======================================================================
(use srfi-69 posix)
(declare (unit api))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses tasks))
(import dbmod)
(import dbfile)
+(import rmtmod)
;; allow these queries through without starting a server
;;
(define api:read-only-queries
'(get-key-val-pairs
Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -28,135 +28,136 @@
(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-(module client
-*
-
-)
-
-(import client)
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-;; client:get-signature
-(define (client:get-signature)
- (if *my-client-signature* *my-client-signature*
- (let ((sig (conc (get-host-name) " " (current-process-id))))
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
-;; Not currently used! But, I think it *should* be used!!!
-#;(define (client:logout serverdat)
- (let ((ok (and (socket? serverdat)
- (cdb:logout serverdat *toppath* (client:get-signature)))))
- ok))
-
-;; Do all the connection work, look up the transport type and set up the
-;; connection if required.
-;;
-;; There are two scenarios.
-;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline
-;; 2. We are a run tests, list runs or other interactive process and we must figure out
-;; *transport-type* and *runremote* from the monitor.db
-;;
-;; client:setup
-;;
-;; lookup_server, need to remove *runremote* stuff
-;;
-
-;;(define (http-transport:server-dat-make-url runremote)
-(define (client:get-url runremote)
- (if (and (remote-iface runremote)
- (remote-port runremote))
- (conc "http://"
- (remote-iface runremote)
- ":"
- (remote-port runremote))
- #f))
-
-(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
- (mutex-lock! *rmt-mutex*)
- (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
- (mutex-unlock! *rmt-mutex*)
- res))
-
-(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0))
- (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
- (server:start-and-wait areapath)
- (if (<= remaining-tries 0)
- (begin
- (debug:print-error 0 *default-log-port* "failed to start or connect to server")
- (exit 1))
- ;;
- ;; Alternatively here, we can get the list of candidate servers and work our way
- ;; through them searching for a good one.
- ;;
- (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid
-;; (runremote (or area-dat *runremote*)))
- (if (not server-dat) ;; no server found
- (begin
- (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
- (match server-dat
- ((host port start-time server-id pid)
- (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
- (if (not runremote)
- (begin
- ;; Here we are creating a runremote where there was none or it was clobbered with #f
- ;;
- (set! runremote (make-remote))
- (let* ((server-info (server:check-if-running areapath)))
- (remote-server-info-set! runremote server-info)
- (if server-info
- (begin
- (remote-server-url-set! runremote (server:record->url server-info))
- (remote-server-id-set! runremote (server:record->id server-info)))))))
- ;; at this point we have a runremote
- (if (and host port server-id)
- (let* ((nada (client:connect host port server-id runremote))
- (ping-res (rmt:login-no-auto-client-setup runremote)))
- (if ping-res
- (if runremote
- (begin
- (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote))
- runremote)
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
- (begin ;; login failed but have a server record, clean out the record and try again
- (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
- (http-transport:close-connections runremote)
- (thread-sleep! 1)
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))
- )))
- (begin ;; no server registered
- ;; (server:kind-run areapath)
- (server:start-and-wait areapath)
- (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
- (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))))
- (else
- (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))
-
-;;
-;; connect - stored in remote-condat
-;;
-;; (define (http-transport:client-connect iface port server-id runremote)
-(define (client:connect iface port server-id runremote-in)
- (let* ((runremote (or runremote-in
- (make-runremote))))
- (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
- (let* ((api-url (conc "http://" iface ":" port "/api"))
- (api-uri (uri-reference (conc "http://" iface ":" port "/api")))
- (api-req (make-request method: 'POST uri: api-uri)))
- ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
- (remote-iface-set! runremote iface)
- (remote-port-set! runremote port)
- (remote-server-id-set! runremote server-id)
- (remote-connect-time-set! runremote (current-seconds))
- (remote-last-access-set! runremote (current-seconds))
- (remote-api-url-set! runremote api-url)
- (remote-api-uri-set! runremote api-uri)
- (remote-api-req-set! runremote api-req)
- runremote)))
-
+;; (module client
+;; *
+;;
+;; )
+;;
+;; (import client)
+;;
+;; (include "common_records.scm")
+;; (include "db_records.scm")
+;;
+;; ;; client:get-signature
+;; (define (client:get-signature)
+;; (if *my-client-signature* *my-client-signature*
+;; (let ((sig (conc (get-host-name) " " (current-process-id))))
+;; (set! *my-client-signature* sig)
+;; *my-client-signature*)))
+;;
+;; ;; Not currently used! But, I think it *should* be used!!!
+;; #;(define (client:logout serverdat)
+;; (let ((ok (and (socket? serverdat)
+;; (cdb:logout serverdat *toppath* (client:get-signature)))))
+;; ok))
+;;
+;; ;; Do all the connection work, look up the transport type and set up the
+;; ;; connection if required.
+;; ;;
+;; ;; There are two scenarios.
+;; ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline
+;; ;; 2. We are a run tests, list runs or other interactive process and we must figure out
+;; ;; *transport-type* and *runremote* from the monitor.db
+;; ;;
+;; ;; client:setup
+;; ;;
+;; ;; lookup_server, need to remove *runremote* stuff
+;; ;;
+;;
+;; ;;(define (http-transport:server-dat-make-url runremote)
+;; (define (client:get-url runremote)
+;; (if (and (remote-iface runremote)
+;; (remote-port runremote))
+;; (conc "http://"
+;; (remote-iface runremote)
+;; ":"
+;; (remote-port runremote))
+;; #f))
+;;
+;; (define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
+;; (mutex-lock! *rmt-mutex*)
+;; (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
+;; (mutex-unlock! *rmt-mutex*)
+;; res))
+;;
+;; (define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0))
+;; (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
+;; (server:start-and-wait areapath)
+;; (if (<= remaining-tries 0)
+;; (begin
+;; (debug:print-error 0 *default-log-port* "failed to start or connect to server")
+;; (exit 1))
+;; ;;
+;; ;; Alternatively here, we can get the list of candidate servers and work our way
+;; ;; through them searching for a good one.
+;; ;;
+;; (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid
+;; ;; (runremote (or area-dat *runremote*)))
+;; (if (not server-dat) ;; no server found
+;; (begin
+;; (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
+;; (match server-dat
+;; ((host port start-time server-id pid)
+;; (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
+;; (if (not runremote)
+;; (begin
+;; ;; Here we are creating a runremote where there was none or it was clobbered with #f
+;; ;;
+;; (set! runremote (make-remote))
+;; (let* ((server-info (server:check-if-running areapath)))
+;; (remote-server-info-set! runremote server-info)
+;; (if server-info
+;; (begin
+;; (remote-server-url-set! runremote (server:record->url server-info))
+;; (remote-server-id-set! runremote (server:record->id server-info)))))))
+;; ;; at this point we have a runremote
+;; (if (and host port server-id)
+;; (let* ((nada (client:connect host port server-id runremote))
+;; (ping-res (rmt:login-no-auto-client-setup runremote)))
+;; (if ping-res
+;; (if runremote
+;; (begin
+;; (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote))
+;; runremote)
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
+;; (begin ;; login failed but have a server record, clean out the record and try again
+;; (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
+;; (http-transport:close-connections runremote)
+;; (thread-sleep! 1)
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))
+;; )))
+;; (begin ;; no server registered
+;; ;; (server:kind-run areapath)
+;; (server:start-and-wait areapath)
+;; (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
+;; (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))))
+;; (else
+;; (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))
+;;
+;; ;;
+;; ;; connect - stored in remote-condat
+;; ;;
+;; ;; (define (http-transport:client-connect iface port server-id runremote)
+;; (define (client:connect iface port server-id runremote-in)
+;; (let* ((runremote (or runremote-in
+;; (make-runremote))))
+;; (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
+;; (let* ((api-url (conc "http://" iface ":" port "/api"))
+;; (api-uri (uri-reference (conc "http://" iface ":" port "/api")))
+;; (api-req (make-request method: 'POST uri: api-uri)))
+;; ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
+;; (remote-iface-set! runremote iface)
+;; (remote-port-set! runremote port)
+;; (remote-server-id-set! runremote server-id)
+;; (remote-connect-time-set! runremote (current-seconds))
+;; (remote-last-access-set! runremote (current-seconds))
+;; (remote-api-url-set! runremote api-url)
+;; (remote-api-uri-set! runremote api-uri)
+;; (remote-api-req-set! runremote api-req)
+;; runremote)))
+;;
+;;
ADDED clientmod.scm
Index: clientmod.scm
==================================================================
--- /dev/null
+++ clientmod.scm
@@ -0,0 +1,33 @@
+
+;; Copyright 2006-2012, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+;; C L I E N T S
+;;======================================================================
+
+;;(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5
+;; message-digest matchable spiffy uri-common intarweb http-client
+;; spiffy-request-vars uri-common intarweb directory-utils)
+
+(declare (unit clientmod))
+
+(module clientmod
+*
+
+)
+
Index: dashboard-context-menu.scm
==================================================================
--- dashboard-context-menu.scm
+++ dashboard-context-menu.scm
@@ -34,11 +34,11 @@
(declare (unit dashboard-context-menu))
(declare (uses common))
(declare (uses db))
(declare (uses gutils))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))
Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -33,11 +33,11 @@
(declare (unit dashboard-tests))
(declare (uses common))
(declare (uses db))
(declare (uses gutils))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -51,11 +51,11 @@
(declare (uses dbmod))
;; (declare (uses debugprint))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
-(declare (uses client))
+;; (declare (uses client))
(declare (uses mt))
(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
Index: diff-report.scm
==================================================================
--- diff-report.scm
+++ diff-report.scm
@@ -16,11 +16,11 @@
;; along with Megatest. If not, see .
;;
(declare (unit diff-report))
(declare (uses common))
-(declare (uses rmt))
+(declare (uses rmtmod))
(include "common_records.scm")
(use matchable)
(use fmt)
(use ducttape-lib)
Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -1,6 +1,5 @@
-
;; Copyright 2006-2012, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
@@ -14,680 +13,681 @@
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
-(require-extension (srfi 18) extras tcp s11n)
-
-
-(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras)
-
-(use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
-
-;; Configurations for server
-(tcp-buffer-size 2048)
-(max-connections 2048)
-
+;; (require-extension (srfi 18) extras tcp s11n)
+;;
+;;
+;; (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras)
+;;
+;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
+;;
+;; ;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048)
+;;
(declare (unit http-transport))
-
-(declare (uses common))
-(declare (uses db))
-(declare (uses tests))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-(declare (uses server))
-;; (declare (uses daemon))
-(declare (uses portlogger))
-(declare (uses rmt))
-(declare (uses dbfile))
-(declare (uses commonmod))
-
-(include "common_records.scm")
-(include "db_records.scm")
-(include "js-path.scm")
-
-(import dbfile commonmod)
-
-(require-library stml)
-(define (http-transport:make-server-url hostport)
- (if (not hostport)
- #f
- (conc "http://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-
-;;======================================================================
-;; S E R V E R
-;; ======================================================================
-
-;; Call this to start the actual server
-;;
-
-(define *db:process-queue-mutex* (make-mutex))
-
-(define (http-transport:run hostn)
- ;; Configurations for server
- (tcp-buffer-size 2048)
- (max-connections 2048)
- (debug:print 2 *default-log-port* "Attempting to start the server ...")
- (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
- (hostname (get-host-name))
- (ipaddrstr (let ((ipstr (if (string=? "-" hostn)
- ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
- (server:get-best-guess-address hostname)
- #f)))
- (if ipstr ipstr hostn))) ;; hostname)))
- (start-port (portlogger:open-run-close portlogger:find-port))
- (link-tree-path (common:get-linktree))
- (tmp-area (common:get-db-tmp-area))
- (start-file (conc tmp-area "/.server-start")))
- (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
- ;; set some parameters for the server
- (root-path (if link-tree-path
- link-tree-path
- (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
- (handle-directory spiffy-directory-listing)
- (handle-exception (lambda (exn chain)
- (signal (make-composite-condition
- (make-property-condition
- 'server
- 'message "server error")))))
-
- ;; http-transport:handle-directory) ;; simple-directory-handler)
- ;; Setup the web server and a /ctrl interface
- ;;
- (vhost-map `(((* any) . ,(lambda (continue)
- ;; open the db on the first call
- ;; This is were we set up the database connections
- (let* (($ (request-vars source: 'both))
- (dat ($ 'dat))
- (res #f))
- (cond
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "api"))
- (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc
- headers: '((content-type text/plain)))
- (mutex-lock! *heartbeat-mutex*)
- (set! *db-last-access* (current-seconds))
- (mutex-unlock! *heartbeat-mutex*))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ ""))
- (send-response body: (http-transport:main-page)))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "json_api"))
- (send-response body: (http-transport:main-page)))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "runs"))
- (send-response body: (http-transport:main-page)))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ any))
- (send-response body: "hey there!\n"
- headers: '((content-type text/plain))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "hey"))
- (send-response body: "hey there!\n"
- headers: '((content-type text/plain))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "jquery3.1.0.js"))
- (send-response body: (http-transport:show-jquery)
- headers: '((content-type application/javascript))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "test_log"))
- (send-response body: (http-transport:html-test-log $)
- headers: '((content-type text/HTML))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "dashboard"))
- (send-response body: (http-transport:html-dboard $)
- headers: '((content-type text/HTML))))
- (else (continue))))))))
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn)
- (with-output-to-file start-file (lambda ()(print (current-process-id)))))
- (http-transport:try-start-server ipaddrstr start-port)))
-
-;; This is recursively run by http-transport:run until sucessful
-;;
-(define (http-transport:try-start-server ipaddrstr portnum)
- (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
- (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
- (if (not config-use-proxy)
- (determine-proxy (constantly #f)))
- (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
- (handle-exceptions
- exn
- (begin
- ;; (print-error-message exn)
- (if (< portnum 64000)
- (begin
- (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
- (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
- (debug:print 5 *default-log-port* "exn=" (condition->list exn))
- (portlogger:open-run-close portlogger:set-failed portnum)
- (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
- (thread-sleep! 0.1)
-
- ;; get_next_port goes here
- (http-transport:try-start-server ipaddrstr
- (portlogger:open-run-close portlogger:find-port)))
- (begin
- (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server"))))
- ;; any error in following steps will result in a retry
- (set! *server-info* (list ipaddrstr portnum))
- (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
- ;; This starts the spiffy server
- ;; NEED WAY TO SET IP TO #f TO BIND ALL
- ;; (start-server bind-address: ipaddrstr port: portnum)
- (if config-hostname ;; this is a hint to bind directly
- (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-")
- ;; ipaddrstr
- ;; config-hostname))
- (start-server port: portnum))
- (portlogger:open-run-close portlogger:set-port portnum "released")
- (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
-
-;;======================================================================
-;; S E R V E R U T I L I T I E S
-;;======================================================================
-
-;;======================================================================
-;; C L I E N T S
-;;======================================================================
-
-(define *http-mutex* (make-mutex))
-
-;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here
-;; I'm pretty sure it is defunct.
-
-;; This next block all imported en-mass from the api branch
-(define *http-requests-in-progress* 0)
-(define *http-connections-next-cleanup* (current-seconds))
-
-(define (http-transport:get-time-to-cleanup)
- (let ((res #f))
- (mutex-lock! *http-mutex*)
- (set! res (> (current-seconds) *http-connections-next-cleanup*))
- (mutex-unlock! *http-mutex*)
- res))
-
-(define (http-transport:inc-requests-count)
- (mutex-lock! *http-mutex*)
- (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))
- ;; Use this opportunity to slow things down iff there are too many requests in flight
- (if (> *http-requests-in-progress* 5)
- (begin
- (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...")
- (thread-sleep! 1)))
- (mutex-unlock! *http-mutex*))
-
-(define (http-transport:dec-requests-count proc)
- (mutex-lock! *http-mutex*)
- (proc)
- (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
- (mutex-unlock! *http-mutex*))
-
-(define (http-transport:dec-requests-count-and-close-all-connections)
- (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
- (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds
- (if (> *http-requests-in-progress* 0)
- (if (> etime (current-seconds))
- (begin
- (thread-sleep! 0.05)
- (loop etime))
- (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections"))
- (close-all-connections!)))
- (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
- (mutex-unlock! *http-mutex*))
-
-(define (http-transport:inc-requests-and-prep-to-close-all-connections)
- (mutex-lock! *http-mutex*)
- (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
-
-;; Send "cmd" with json payload "params" to serverdat and receive result
-;;
-(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3))
- (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat)
- (let* ((fullurl (remote-api-req runremote))
- (res (vector #f "uninitialized"))
- (success #t)
- (sparams (db:obj->string params transport: 'http))
- (server-id (remote-server-id runremote)))
- (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds))
-
- ;; set up the http-client here
- (max-retry-attempts 1)
- ;; consider all requests indempotent
- (retry-request? (lambda (request)
- #f))
- ;; send the data and get the response
- ;; extract the needed info from the http data and
- ;; process and return it.
- (let* ((send-recieve (lambda ()
- (mutex-lock! *http-mutex*)
- ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
- ;; ((exn http client-error) e (print e)))
- (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
- success
- (db:string->obj
- (handle-exceptions
- exn
- (let ((call-chain (get-call-chain))
- (msg ((condition-property-accessor 'exn 'message) exn)))
- (set! success #f)
- (if (debug:debug-mode 1)
- (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
- (begin
- (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
- (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
- (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
- (debug:print 0 *default-log-port* " call-chain: " call-chain)))
- ;; what if another thread is communicating ok? Can't happen due to mutex
- (http-transport:close-connections runremote)
- (mutex-unlock! *http-mutex*)
- ;; (close-connection! fullurl)
- (db:obj->string #f))
- (with-input-from-request ;; was dat
- fullurl
- (list (cons 'key (or server-id "thekey"))
- (cons 'cmd cmd)
- (cons 'params sparams))
- read-string))
- transport: 'http)
- 0)) ;; added this speculatively
- ;; Shouldn't this be a call to the managed call-all-connections stuff above?
- ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections?
- (mutex-unlock! *http-mutex*)
- ))
- (time-out (lambda ()
- (thread-sleep! 45)
- (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
- #f))
- (th1 (make-thread send-recieve "with-input-from-request"))
- (th2 (make-thread time-out "time out")))
- (thread-start! th1)
- (thread-start! th2)
- (thread-join! th1)
- (vector-set! res 0 success)
- (thread-terminate! th2)
- (if (vector? res)
- (if (vector-ref res 0) ;; this is the first flag or the second flag?
- (let* ((res-dat (vector-ref res 1)))
- (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
- (signal (make-composite-condition
- (make-property-condition
- 'servermismatch
- 'message (vector-ref res 1))))
- res)) ;; this is the *inner* vector? seriously? why?
- (if (debug:debug-mode 11)
- (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
- (print-call-chain (current-error-port))
- (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
- (debug:print 11 *default-log-port* " server call chain:")
- (pp (vector-ref res 1) (current-error-port))
- (signal (vector-ref res 0)))
- res))
- (signal (make-composite-condition
- (make-property-condition
- 'timeout
- 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))
-
-;; careful closing of connections stored in *runremote*
-;;
-(define (http-transport:close-connections runremote)
- (if (remote? runremote)
- (let ((api-dat (remote-api-uri runremote)))
- (handle-exceptions
- exn
- (begin
- (print-call-chain *default-log-port*)
- (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
- (if (args:any-defined? "-server" "-execute" "-run")
- (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
- (if api-dat (close-connection! api-dat))
- (remote-conndat-set! runremote #f)
- #t))
- #f))
-
-;; run http-transport:keep-running in a parallel thread to monitor that the db is being
-;; used and to shutdown after sometime if it is not.
-;;
-(define (http-transport:keep-running)
- ;; if none running or if > 20 seconds since
- ;; server last used then start shutdown
- ;; This thread waits for the server to come alive
- (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
- (let* ((servinfofile #f)
- (sdat #f)
- (no-sync-db (db:open-no-sync-db))
- (tmp-area (common:get-db-tmp-area))
- (started-file (conc tmp-area "/.server-started"))
- (server-start-time (current-seconds))
- (server-info (let loop ((start-time (current-seconds))
- (changed #t)
- (last-sdat "not this"))
- (begin ;; let ((sdat #f))
- (thread-sleep! 0.01)
- (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
- (mutex-lock! *heartbeat-mutex*)
- (set! sdat *server-info*)
- (mutex-unlock! *heartbeat-mutex*)
- (if (and sdat
- (not changed)
- (> (- (current-seconds) start-time) 2))
- (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo"))
- (ipaddr (car sdat))
- (port (cadr sdat))
- (servinf (conc servinfodir"/"ipaddr":"port)))
- (set! servinfofile servinf)
- (if (not (file-exists? servinfodir))
- (create-directory servinfodir #t))
- (with-output-to-file servinf
- (lambda ()
- (let* ((serv-id (server:mk-signature)))
- (set! *server-id* serv-id)
- (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))
- (print "started: "(seconds->year-week/day-time (current-seconds))))))
- (set! *on-exit-procs* (cons
- (lambda ()
- (delete-file* servinf))
- *on-exit-procs*))
- ;; put data about this server into a simple flat file host.port
- (debug:print-info 0 *default-log-port* "Received server alive signature")
- sdat)
- (begin
- (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
- (sleep 4)
- (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
- (if sdat
- (let* ((ipaddr (car sdat))
- (port (cadr sdat))
- (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port)))
- (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
- (exit))
- (exit)
- )
- (loop start-time
- (equal? sdat last-sdat)
- sdat)))))))
- (iface (car server-info))
- (port (cadr server-info))
- (last-access 0)
- (server-timeout (server:expiration-timeout))
- (server-going #f)
- (server-log-file (args:get-arg "-log"))) ;; always set when we are a server
-
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
- (with-output-to-file started-file (lambda ()(print (current-process-id)))))
-
- (let loop ((count 0)
- (server-state 'available)
- (bad-sync-count 0)
- (start-time (current-milliseconds)))
-
- ;; Use this opportunity to sync the tmp db to megatest.db
- (if (not server-going) ;; *dbstruct-dbs*
- (begin
- (debug:print 0 *default-log-port* "SERVER: dbprep")
- (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!!
- (set! server-going #t)
- (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
- (if (and no-sync-db
- (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
- (begin
- (if (common:low-noise-print 120 "sync-all-print")
- (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")))
- (db:all-db-sync *dbstruct-dbs*)
- )))
-
- ;; when things go wrong we don't want to be doing the various queries too often
- ;; so we strive to run this stuff only every four seconds or so.
- (let* ((sync-time (- (current-milliseconds) start-time))
- (rem-time (quotient (- 4000 sync-time) 1000)))
- (if (and (<= rem-time 4)
- (> rem-time 0))
- (thread-sleep! rem-time)))
-
- (if (< count 1) ;; 3x3 = 9 secs aprox
- (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
-
- ;; Check that iface and port have not changed (can happen if server port collides)
- (mutex-lock! *heartbeat-mutex*)
- (set! sdat *server-info*)
- (mutex-unlock! *heartbeat-mutex*)
-
- (if (not (equal? sdat (list iface port)))
- (let ((new-iface (car sdat))
- (new-port (cadr sdat)))
- (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
- (set! iface new-iface)
- (set! port new-port)
- (if (not *server-id*)
- (set! *server-id* (server:mk-signature)))
- (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
- (flush-output *default-log-port*)))
-
- ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
- (mutex-lock! *heartbeat-mutex*)
- (set! last-access *db-last-access*)
- (mutex-unlock! *heartbeat-mutex*)
-
- (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
- (begin
- (if (not *server-id*)
- (set! *server-id* (server:mk-signature)))
- (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
- (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
- (flush-output *default-log-port*)))
- (if (common:low-noise-print 60 "dbstats")
- (begin
- (debug:print 0 *default-log-port* "Server stats:")
- (db:print-current-query-stats)))
- (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
- (cond
- ((and *server-run*
- (> (+ last-access server-timeout)
- (current-seconds)))
- (if (common:low-noise-print 120 "server continuing")
- (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
- (let ((curr-time (current-seconds)))
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
- (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
- (not *server-overloaded*)
- (file-exists? servinfofile))
- (change-file-times servinfofile curr-time curr-time)))
- (if (and (common:low-noise-print 120 "start new server")
- (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
- (begin
- (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...")
- (server:kind-run *toppath*)
- (if (> *api-process-request-count* 100)
- (begin
- (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile)
- (delete-file* servinfofile)))))))
- (loop 0 server-state bad-sync-count (current-milliseconds)))
- (else
- (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
- (http-transport:server-shutdown port)))))))
-
-(define (http-transport:server-shutdown port)
- (begin
- ;;(BB> "http-transport:server-shutdown called")
- (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
- ;;
- ;; start_shutdown
- ;;
- (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
- (portlogger:open-run-close portlogger:set-port port "released")
- (thread-sleep! 1)
-
- ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
- ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*)
- ;; (debug:print-info 0 *default-log-port* "Average cached write time "
- ;; (if (eq? *number-of-writes* 0)
- ;; "n/a (no writes)"
- ;; (/ *writes-total-delay*
- ;; *number-of-writes*))
- ;; " ms")
- ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*)
- ;; (debug:print-info 0 *default-log-port* "Average non-cached time "
- ;; (if (eq? *number-non-write-queries* 0)
- ;; "n/a (no queries)"
- ;; (/ *total-non-write-delay*
- ;; *number-non-write-queries*))
- ;; " ms")
-
- (db:print-current-query-stats)
- #;(common:save-pkt `((action . exit)
- (T . server)
- (pid . ,(current-process-id)))
- *configdat* #t)
-
- ;; remove .servinfo file(s) here
-
- (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
- (exit)))
-
-;; all routes though here end in exit ...
-;;
-;; start_server?
-;;
-(define (http-transport:launch)
- ;; check the .servinfo directory, are there other servers running on this
- ;; or another host?
- (let* ((server-start-is-ok (server:minimal-check *toppath*)))
- (if (not server-start-is-ok)
- (begin
- (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.")
- (exit 1))))
-
- ;; check that a server start is in progress, pause or exit if so
- (let* ((th2 (make-thread (lambda ()
- (debug:print-info 0 *default-log-port* "Server run thread started")
- (http-transport:run
- (if (args:get-arg "-server")
- (args:get-arg "-server")
- "-")
- )) "Server run"))
- (th3 (make-thread (lambda ()
- (debug:print-info 0 *default-log-port* "Server monitor thread started")
- (http-transport:keep-running)
- "Keep running"))))
- (thread-start! th2)
- (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
- (thread-start! th3)
- (set! *didsomething* #t)
- (thread-join! th2)
- (exit)))
-
-;; (define (http-transport:server-signal-handler signum)
-;; (signal-mask! signum)
-;; (handle-exceptions
-;; exn
-;; (debug:print 0 *default-log-port* " ... exiting ...")
-;; (let ((th1 (make-thread (lambda ()
-;; (thread-sleep! 1))
-;; "eat response"))
-;; (th2 (make-thread (lambda ()
-;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
-;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
-;; (debug:print 0 *default-log-port* " Done.")
-;; (exit 4))
-;; "exit on ^C timer")))
-;; (thread-start! th2)
-;; (thread-start! th1)
-;; (thread-join! th2))))
-
-;;===============================================
-;; Java script
-;;===============================================
-(define (http-transport:show-jquery)
- (let* ((data (tests:readlines *java-script-lib*)))
-(string-join data "\n")))
-
-
-
-;;======================================================================
-;; web pages
-;;======================================================================
-
-(define (http-transport:html-test-log $)
- (let* ((run-id ($ 'runid))
- (test-item ($ 'testname))
- (parts (string-split test-item ":"))
- (test-name (car parts))
-
- (item-name (if (equal? (length parts) 1)
- ""
- (cadr parts))))
- ;(print $)
-(tests:get-test-log run-id test-name item-name)))
-
-
-(define (http-transport:html-dboard $)
- (let* ((page ($ 'page))
- (oup (open-output-string))
- (bdy "--------------------------")
-
- (ret (tests:dynamic-dboard page)))
- (s:output-new oup ret)
- (close-output-port oup)
-
- (set! bdy (get-output-string oup))
- (conc "
Dashboard
" bdy "
" )))
-
-(define (http-transport:main-page)
- (let ((linkpath (root-path)))
- (conc "" (pathname-strip-directory *toppath*) "
"
- ""
- "Run area: " *toppath*
- "Server Stats
"
- (http-transport:stats-table)
- "
"
- (http-transport:runs linkpath)
- "
"
- ;; (http-transport:run-stats)
- ""
- )))
-
-(define (http-transport:stats-table)
- (mutex-lock! *heartbeat-mutex*)
- (let ((res
- (conc ""
- ;; "Max cached queries | " *max-cache-size* " |
"
- "Number of cached writes | " *number-of-writes* " |
"
- "Average cached write time | " (if (eq? *number-of-writes* 0)
- "n/a (no writes)"
- (/ *writes-total-delay*
- *number-of-writes*))
- " ms |
"
- "Number non-cached queries | " *number-non-write-queries* " |
"
- ;; "Average non-cached time | " (if (eq? *number-non-write-queries* 0)
- ;; "n/a (no queries)"
- ;; (/ *total-non-write-delay*
- ;; *number-non-write-queries*))
- " ms |
"
- "Last access | " (seconds->time-string *db-last-access*) " |
"
- "
")))
- (mutex-unlock! *heartbeat-mutex*)
- res))
-
-(define (http-transport:runs linkpath)
- (conc "Runs
"
- (string-intersperse
- (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
- (map (lambda (p)
- (conc "" p "
"))
- files))
- " ")))
-
-#;(define (http-transport:run-stats)
- (let ((stats (open-run-close db:get-running-stats #f)))
- (conc ""
- (string-intersperse
- (map (lambda (stat)
- (conc "" (car stat) " | " (cadr stat) " |
"))
- stats)
- " ")
- "
")))
+;;
+;; (declare (uses common))
+;; (declare (uses db))
+;; (declare (uses tests))
+;; (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
+;; (declare (uses server))
+;; ;; (declare (uses daemon))
+;; (declare (uses portlogger))
+;; (declare (uses rmt))
+;; (declare (uses dbfile))
+;; (declare (uses commonmod))
+;;
+;; (include "common_records.scm")
+;; (include "db_records.scm")
+;; (include "js-path.scm")
+;;
+;; (import dbfile commonmod)
+;;
+;; (require-library stml)
+;; (define (http-transport:make-server-url hostport)
+;; (if (not hostport)
+;; #f
+;; (conc "http://" (car hostport) ":" (cadr hostport))))
+;;
+;; (define *server-loop-heart-beat* (current-seconds))
+;;
+;; ;;======================================================================
+;; ;; S E R V E R
+;; ;; ======================================================================
+;;
+;; ;; Call this to start the actual server
+;; ;;
+;;
+;; (define *db:process-queue-mutex* (make-mutex))
+;;
+;; (define (http-transport:run hostn)
+;; ;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048)
+;; (debug:print 2 *default-log-port* "Attempting to start the server ...")
+;; (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
+;; (hostname (get-host-name))
+;; (ipaddrstr (let ((ipstr (if (string=? "-" hostn)
+;; ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
+;; (server:get-best-guess-address hostname)
+;; #f)))
+;; (if ipstr ipstr hostn))) ;; hostname)))
+;; (start-port (portlogger:open-run-close portlogger:find-port))
+;; (link-tree-path (common:get-linktree))
+;; (tmp-area (common:get-db-tmp-area))
+;; (start-file (conc tmp-area "/.server-start")))
+;; (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
+;; ;; set some parameters for the server
+;; (root-path (if link-tree-path
+;; link-tree-path
+;; (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
+;; (handle-directory spiffy-directory-listing)
+;; (handle-exception (lambda (exn chain)
+;; (signal (make-composite-condition
+;; (make-property-condition
+;; 'server
+;; 'message "server error")))))
+;;
+;; ;; http-transport:handle-directory) ;; simple-directory-handler)
+;; ;; Setup the web server and a /ctrl interface
+;; ;;
+;; (vhost-map `(((* any) . ,(lambda (continue)
+;; ;; open the db on the first call
+;; ;; This is were we set up the database connections
+;; (let* (($ (request-vars source: 'both))
+;; (dat ($ 'dat))
+;; (res #f))
+;; (cond
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "api"))
+;; (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc
+;; headers: '((content-type text/plain)))
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! *db-last-access* (current-seconds))
+;; (mutex-unlock! *heartbeat-mutex*))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ ""))
+;; (send-response body: (http-transport:main-page)))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "json_api"))
+;; (send-response body: (http-transport:main-page)))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "runs"))
+;; (send-response body: (http-transport:main-page)))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ any))
+;; (send-response body: "hey there!\n"
+;; headers: '((content-type text/plain))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "hey"))
+;; (send-response body: "hey there!\n"
+;; headers: '((content-type text/plain))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "jquery3.1.0.js"))
+;; (send-response body: (http-transport:show-jquery)
+;; headers: '((content-type application/javascript))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "test_log"))
+;; (send-response body: (http-transport:html-test-log $)
+;; headers: '((content-type text/HTML))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "dashboard"))
+;; (send-response body: (http-transport:html-dboard $)
+;; headers: '((content-type text/HTML))))
+;; (else (continue))))))))
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn)
+;; (with-output-to-file start-file (lambda ()(print (current-process-id)))))
+;; (http-transport:try-start-server ipaddrstr start-port)))
+;;
+;; ;; This is recursively run by http-transport:run until sucessful
+;; ;;
+;; (define (http-transport:try-start-server ipaddrstr portnum)
+;; (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
+;; (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
+;; (if (not config-use-proxy)
+;; (determine-proxy (constantly #f)))
+;; (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; ;; (print-error-message exn)
+;; (if (< portnum 64000)
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
+;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+;; (debug:print 5 *default-log-port* "exn=" (condition->list exn))
+;; (portlogger:open-run-close portlogger:set-failed portnum)
+;; (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
+;; (thread-sleep! 0.1)
+;;
+;; ;; get_next_port goes here
+;; (http-transport:try-start-server ipaddrstr
+;; (portlogger:open-run-close portlogger:find-port)))
+;; (begin
+;; (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server"))))
+;; ;; any error in following steps will result in a retry
+;; (set! *server-info* (list ipaddrstr portnum))
+;; (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
+;; ;; This starts the spiffy server
+;; ;; NEED WAY TO SET IP TO #f TO BIND ALL
+;; ;; (start-server bind-address: ipaddrstr port: portnum)
+;; (if config-hostname ;; this is a hint to bind directly
+;; (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-")
+;; ;; ipaddrstr
+;; ;; config-hostname))
+;; (start-server port: portnum))
+;; (portlogger:open-run-close portlogger:set-port portnum "released")
+;; (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
+;;
+;; ;;======================================================================
+;; ;; S E R V E R U T I L I T I E S
+;; ;;======================================================================
+;;
+;; ;;======================================================================
+;; ;; C L I E N T S
+;; ;;======================================================================
+;;
+;; (define *http-mutex* (make-mutex))
+;;
+;; ;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here
+;; ;; I'm pretty sure it is defunct.
+;;
+;; ;; This next block all imported en-mass from the api branch
+;; (define *http-requests-in-progress* 0)
+;; (define *http-connections-next-cleanup* (current-seconds))
+;;
+;; (define (http-transport:get-time-to-cleanup)
+;; (let ((res #f))
+;; (mutex-lock! *http-mutex*)
+;; (set! res (> (current-seconds) *http-connections-next-cleanup*))
+;; (mutex-unlock! *http-mutex*)
+;; res))
+;;
+;; (define (http-transport:inc-requests-count)
+;; (mutex-lock! *http-mutex*)
+;; (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))
+;; ;; Use this opportunity to slow things down iff there are too many requests in flight
+;; (if (> *http-requests-in-progress* 5)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...")
+;; (thread-sleep! 1)))
+;; (mutex-unlock! *http-mutex*))
+;;
+;; (define (http-transport:dec-requests-count proc)
+;; (mutex-lock! *http-mutex*)
+;; (proc)
+;; (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
+;; (mutex-unlock! *http-mutex*))
+;;
+;; (define (http-transport:dec-requests-count-and-close-all-connections)
+;; (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
+;; (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds
+;; (if (> *http-requests-in-progress* 0)
+;; (if (> etime (current-seconds))
+;; (begin
+;; (thread-sleep! 0.05)
+;; (loop etime))
+;; (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections"))
+;; (close-all-connections!)))
+;; (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
+;; (mutex-unlock! *http-mutex*))
+;;
+;; (define (http-transport:inc-requests-and-prep-to-close-all-connections)
+;; (mutex-lock! *http-mutex*)
+;; (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
+;;
+;; ;; Send "cmd" with json payload "params" to serverdat and receive result
+;; ;;
+;; (define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3))
+;; (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat)
+;; (let* ((fullurl (remote-api-req runremote))
+;; (res (vector #f "uninitialized"))
+;; (success #t)
+;; (sparams (db:obj->string params transport: 'http))
+;; (server-id (remote-server-id runremote)))
+;; (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds))
+;;
+;; ;; set up the http-client here
+;; (max-retry-attempts 1)
+;; ;; consider all requests indempotent
+;; (retry-request? (lambda (request)
+;; #f))
+;; ;; send the data and get the response
+;; ;; extract the needed info from the http data and
+;; ;; process and return it.
+;; (let* ((send-recieve (lambda ()
+;; (mutex-lock! *http-mutex*)
+;; ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
+;; ;; ((exn http client-error) e (print e)))
+;; (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
+;; success
+;; (db:string->obj
+;; (handle-exceptions
+;; exn
+;; (let ((call-chain (get-call-chain))
+;; (msg ((condition-property-accessor 'exn 'message) exn)))
+;; (set! success #f)
+;; (if (debug:debug-mode 1)
+;; (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
+;; (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
+;; (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
+;; (debug:print 0 *default-log-port* " call-chain: " call-chain)))
+;; ;; what if another thread is communicating ok? Can't happen due to mutex
+;; (http-transport:close-connections runremote)
+;; (mutex-unlock! *http-mutex*)
+;; ;; (close-connection! fullurl)
+;; (db:obj->string #f))
+;; (with-input-from-request ;; was dat
+;; fullurl
+;; (list (cons 'key (or server-id "thekey"))
+;; (cons 'cmd cmd)
+;; (cons 'params sparams))
+;; read-string))
+;; transport: 'http)
+;; 0)) ;; added this speculatively
+;; ;; Shouldn't this be a call to the managed call-all-connections stuff above?
+;; ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections?
+;; (mutex-unlock! *http-mutex*)
+;; ))
+;; (time-out (lambda ()
+;; (thread-sleep! 45)
+;; (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
+;; #f))
+;; (th1 (make-thread send-recieve "with-input-from-request"))
+;; (th2 (make-thread time-out "time out")))
+;; (thread-start! th1)
+;; (thread-start! th2)
+;; (thread-join! th1)
+;; (vector-set! res 0 success)
+;; (thread-terminate! th2)
+;; (if (vector? res)
+;; (if (vector-ref res 0) ;; this is the first flag or the second flag?
+;; (let* ((res-dat (vector-ref res 1)))
+;; (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
+;; (signal (make-composite-condition
+;; (make-property-condition
+;; 'servermismatch
+;; 'message (vector-ref res 1))))
+;; res)) ;; this is the *inner* vector? seriously? why?
+;; (if (debug:debug-mode 11)
+;; (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
+;; (print-call-chain (current-error-port))
+;; (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
+;; (debug:print 11 *default-log-port* " server call chain:")
+;; (pp (vector-ref res 1) (current-error-port))
+;; (signal (vector-ref res 0)))
+;; res))
+;; (signal (make-composite-condition
+;; (make-property-condition
+;; 'timeout
+;; 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))
+;;
+;; ;; careful closing of connections stored in *runremote*
+;; ;;
+;; (define (http-transport:close-connections runremote)
+;; (if (remote? runremote)
+;; (let ((api-dat (remote-api-uri runremote)))
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (print-call-chain *default-log-port*)
+;; (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
+;; (if (args:any-defined? "-server" "-execute" "-run")
+;; (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
+;; (if api-dat (close-connection! api-dat))
+;; (remote-conndat-set! runremote #f)
+;; #t))
+;; #f))
+;;
+;; ;; run http-transport:keep-running in a parallel thread to monitor that the db is being
+;; ;; used and to shutdown after sometime if it is not.
+;; ;;
+;; (define (http-transport:keep-running)
+;; ;; if none running or if > 20 seconds since
+;; ;; server last used then start shutdown
+;; ;; This thread waits for the server to come alive
+;; (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+;; (let* ((servinfofile #f)
+;; (sdat #f)
+;; (no-sync-db (db:open-no-sync-db))
+;; (tmp-area (common:get-db-tmp-area))
+;; (started-file (conc tmp-area "/.server-started"))
+;; (server-start-time (current-seconds))
+;; (server-info (let loop ((start-time (current-seconds))
+;; (changed #t)
+;; (last-sdat "not this"))
+;; (begin ;; let ((sdat #f))
+;; (thread-sleep! 0.01)
+;; (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! sdat *server-info*)
+;; (mutex-unlock! *heartbeat-mutex*)
+;; (if (and sdat
+;; (not changed)
+;; (> (- (current-seconds) start-time) 2))
+;; (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo"))
+;; (ipaddr (car sdat))
+;; (port (cadr sdat))
+;; (servinf (conc servinfodir"/"ipaddr":"port)))
+;; (set! servinfofile servinf)
+;; (if (not (file-exists? servinfodir))
+;; (create-directory servinfodir #t))
+;; (with-output-to-file servinf
+;; (lambda ()
+;; (let* ((serv-id (server:mk-signature)))
+;; (set! *server-id* serv-id)
+;; (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))
+;; (print "started: "(seconds->year-week/day-time (current-seconds))))))
+;; (set! *on-exit-procs* (cons
+;; (lambda ()
+;; (delete-file* servinf))
+;; *on-exit-procs*))
+;; ;; put data about this server into a simple flat file host.port
+;; (debug:print-info 0 *default-log-port* "Received server alive signature")
+;; sdat)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+;; (sleep 4)
+;; (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+;; (if sdat
+;; (let* ((ipaddr (car sdat))
+;; (port (cadr sdat))
+;; (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port)))
+;; (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+;; (exit))
+;; (exit)
+;; )
+;; (loop start-time
+;; (equal? sdat last-sdat)
+;; sdat)))))))
+;; (iface (car server-info))
+;; (port (cadr server-info))
+;; (last-access 0)
+;; (server-timeout (server:expiration-timeout))
+;; (server-going #f)
+;; (server-log-file (args:get-arg "-log"))) ;; always set when we are a server
+;;
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
+;; (with-output-to-file started-file (lambda ()(print (current-process-id)))))
+;;
+;; (let loop ((count 0)
+;; (server-state 'available)
+;; (bad-sync-count 0)
+;; (start-time (current-milliseconds)))
+;;
+;; ;; Use this opportunity to sync the tmp db to megatest.db
+;; (if (not server-going) ;; *dbstruct-dbs*
+;; (begin
+;; (debug:print 0 *default-log-port* "SERVER: dbprep")
+;; (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!!
+;; (set! server-going #t)
+;; (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
+;; (if (and no-sync-db
+;; (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
+;; (begin
+;; (if (common:low-noise-print 120 "sync-all-print")
+;; (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")))
+;; (db:all-db-sync *dbstruct-dbs*)
+;; )))
+;;
+;; ;; when things go wrong we don't want to be doing the various queries too often
+;; ;; so we strive to run this stuff only every four seconds or so.
+;; (let* ((sync-time (- (current-milliseconds) start-time))
+;; (rem-time (quotient (- 4000 sync-time) 1000)))
+;; (if (and (<= rem-time 4)
+;; (> rem-time 0))
+;; (thread-sleep! rem-time)))
+;;
+;; (if (< count 1) ;; 3x3 = 9 secs aprox
+;; (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
+;;
+;; ;; Check that iface and port have not changed (can happen if server port collides)
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! sdat *server-info*)
+;; (mutex-unlock! *heartbeat-mutex*)
+;;
+;; (if (not (equal? sdat (list iface port)))
+;; (let ((new-iface (car sdat))
+;; (new-port (cadr sdat)))
+;; (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+;; (set! iface new-iface)
+;; (set! port new-port)
+;; (if (not *server-id*)
+;; (set! *server-id* (server:mk-signature)))
+;; (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
+;; (flush-output *default-log-port*)))
+;;
+;; ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! last-access *db-last-access*)
+;; (mutex-unlock! *heartbeat-mutex*)
+;;
+;; (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
+;; (begin
+;; (if (not *server-id*)
+;; (set! *server-id* (server:mk-signature)))
+;; (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
+;; (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
+;; (flush-output *default-log-port*)))
+;; (if (common:low-noise-print 60 "dbstats")
+;; (begin
+;; (debug:print 0 *default-log-port* "Server stats:")
+;; (db:print-current-query-stats)))
+;; (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
+;; (cond
+;; ((and *server-run*
+;; (> (+ last-access server-timeout)
+;; (current-seconds)))
+;; (if (common:low-noise-print 120 "server continuing")
+;; (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
+;; (let ((curr-time (current-seconds)))
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
+;; (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
+;; (not *server-overloaded*)
+;; (file-exists? servinfofile))
+;; (change-file-times servinfofile curr-time curr-time)))
+;; (if (and (common:low-noise-print 120 "start new server")
+;; (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...")
+;; (server:kind-run *toppath*)
+;; (if (> *api-process-request-count* 100)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile)
+;; (delete-file* servinfofile)))))))
+;; (loop 0 server-state bad-sync-count (current-milliseconds)))
+;; (else
+;; (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+;; (http-transport:server-shutdown port)))))))
+;;
+;; (define (http-transport:server-shutdown port)
+;; (begin
+;; ;;(BB> "http-transport:server-shutdown called")
+;; (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+;; ;;
+;; ;; start_shutdown
+;; ;;
+;; (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
+;; (portlogger:open-run-close portlogger:set-port port "released")
+;; (thread-sleep! 1)
+;;
+;; ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
+;; ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*)
+;; ;; (debug:print-info 0 *default-log-port* "Average cached write time "
+;; ;; (if (eq? *number-of-writes* 0)
+;; ;; "n/a (no writes)"
+;; ;; (/ *writes-total-delay*
+;; ;; *number-of-writes*))
+;; ;; " ms")
+;; ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*)
+;; ;; (debug:print-info 0 *default-log-port* "Average non-cached time "
+;; ;; (if (eq? *number-non-write-queries* 0)
+;; ;; "n/a (no queries)"
+;; ;; (/ *total-non-write-delay*
+;; ;; *number-non-write-queries*))
+;; ;; " ms")
+;;
+;; (db:print-current-query-stats)
+;; #;(common:save-pkt `((action . exit)
+;; (T . server)
+;; (pid . ,(current-process-id)))
+;; *configdat* #t)
+;;
+;; ;; remove .servinfo file(s) here
+;;
+;; (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
+;; (exit)))
+;;
+;; ;; all routes though here end in exit ...
+;; ;;
+;; ;; start_server?
+;; ;;
+;; (define (http-transport:launch)
+;; ;; check the .servinfo directory, are there other servers running on this
+;; ;; or another host?
+;; (let* ((server-start-is-ok (server:minimal-check *toppath*)))
+;; (if (not server-start-is-ok)
+;; (begin
+;; (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.")
+;; (exit 1))))
+;;
+;; ;; check that a server start is in progress, pause or exit if so
+;; (let* ((th2 (make-thread (lambda ()
+;; (debug:print-info 0 *default-log-port* "Server run thread started")
+;; (http-transport:run
+;; (if (args:get-arg "-server")
+;; (args:get-arg "-server")
+;; "-")
+;; )) "Server run"))
+;; (th3 (make-thread (lambda ()
+;; (debug:print-info 0 *default-log-port* "Server monitor thread started")
+;; (http-transport:keep-running)
+;; "Keep running"))))
+;; (thread-start! th2)
+;; (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
+;; (thread-start! th3)
+;; (set! *didsomething* #t)
+;; (thread-join! th2)
+;; (exit)))
+;;
+;; ;; (define (http-transport:server-signal-handler signum)
+;; ;; (signal-mask! signum)
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (debug:print 0 *default-log-port* " ... exiting ...")
+;; ;; (let ((th1 (make-thread (lambda ()
+;; ;; (thread-sleep! 1))
+;; ;; "eat response"))
+;; ;; (th2 (make-thread (lambda ()
+;; ;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
+;; ;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
+;; ;; (debug:print 0 *default-log-port* " Done.")
+;; ;; (exit 4))
+;; ;; "exit on ^C timer")))
+;; ;; (thread-start! th2)
+;; ;; (thread-start! th1)
+;; ;; (thread-join! th2))))
+;;
+;; ;;===============================================
+;; ;; Java script
+;; ;;===============================================
+;; (define (http-transport:show-jquery)
+;; (let* ((data (tests:readlines *java-script-lib*)))
+;; (string-join data "\n")))
+;;
+;;
+;;
+;; ;;======================================================================
+;; ;; web pages
+;; ;;======================================================================
+;;
+;; (define (http-transport:html-test-log $)
+;; (let* ((run-id ($ 'runid))
+;; (test-item ($ 'testname))
+;; (parts (string-split test-item ":"))
+;; (test-name (car parts))
+;;
+;; (item-name (if (equal? (length parts) 1)
+;; ""
+;; (cadr parts))))
+;; ;(print $)
+;; (tests:get-test-log run-id test-name item-name)))
+;;
+;;
+;; (define (http-transport:html-dboard $)
+;; (let* ((page ($ 'page))
+;; (oup (open-output-string))
+;; (bdy "--------------------------")
+;;
+;; (ret (tests:dynamic-dboard page)))
+;; (s:output-new oup ret)
+;; (close-output-port oup)
+;;
+;; (set! bdy (get-output-string oup))
+;; (conc "Dashboard
" bdy "
" )))
+;;
+;; (define (http-transport:main-page)
+;; (let ((linkpath (root-path)))
+;; (conc "" (pathname-strip-directory *toppath*) "
"
+;; ""
+;; "Run area: " *toppath*
+;; "Server Stats
"
+;; (http-transport:stats-table)
+;; "
"
+;; (http-transport:runs linkpath)
+;; "
"
+;; ;; (http-transport:run-stats)
+;; ""
+;; )))
+;;
+;; (define (http-transport:stats-table)
+;; (mutex-lock! *heartbeat-mutex*)
+;; (let ((res
+;; (conc ""
+;; ;; "Max cached queries | " *max-cache-size* " |
"
+;; "Number of cached writes | " *number-of-writes* " |
"
+;; "Average cached write time | " (if (eq? *number-of-writes* 0)
+;; "n/a (no writes)"
+;; (/ *writes-total-delay*
+;; *number-of-writes*))
+;; " ms |
"
+;; "Number non-cached queries | " *number-non-write-queries* " |
"
+;; ;; "Average non-cached time | " (if (eq? *number-non-write-queries* 0)
+;; ;; "n/a (no queries)"
+;; ;; (/ *total-non-write-delay*
+;; ;; *number-non-write-queries*))
+;; " ms |
"
+;; "Last access | " (seconds->time-string *db-last-access*) " |
"
+;; "
")))
+;; (mutex-unlock! *heartbeat-mutex*)
+;; res))
+;;
+;; (define (http-transport:runs linkpath)
+;; (conc "Runs
"
+;; (string-intersperse
+;; (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
+;; (map (lambda (p)
+;; (conc "" p "
"))
+;; files))
+;; " ")))
+;;
+;; #;(define (http-transport:run-stats)
+;; (let ((stats (open-run-close db:get-running-stats #f)))
+;; (conc ""
+;; (string-intersperse
+;; (map (lambda (stat)
+;; (conc "" (car stat) " | " (cadr stat) " |
"))
+;; stats)
+;; " ")
+;; "
")))
+;;
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -25,12 +25,10 @@
(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses margs))
(declare (uses runs))
(declare (uses launch))
-(declare (uses server))
-(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))
(declare (uses db))
@@ -43,10 +41,13 @@
(declare (uses env))
(declare (uses diff-report))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbmod.import))
+(declare (uses rmtmod))
+(declare (uses clientmod))
+(declare (uses servermod))
(declare (uses commonmod))
(declare (uses commonmod.import))
(declare (uses dbfile))
(declare (uses dbfile.import))
;; (declare (uses debugprint))
Index: mt.scm
==================================================================
--- mt.scm
+++ mt.scm
@@ -24,13 +24,13 @@
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
-(declare (uses server))
+(declare (uses servermod))
(declare (uses runs))
-(declare (uses rmt))
+(declare (uses rmtmod))
;; (declare (uses filedb))
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
ADDED mtserv.scm
Index: mtserv.scm
==================================================================
--- /dev/null
+++ mtserv.scm
@@ -0,0 +1,94 @@
+; Copyright 2006-2017, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+;;
+
+;; (include "common.scm")
+;; (include "megatest-version.scm")
+
+;; fake out readline usage of toplevel-command
+(define (toplevel-command . a) #f)
+
+(use srfi-1 posix srfi-69 readline ;; regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras)
+ srfi-19 srfi-18 extras format pkts regex regex-case
+ (prefix dbi dbi:)
+ )
+
+;; (declare (uses common))
+(declare (uses margs))
+(declare (uses configf))
+;; (declare (uses rmt))
+
+;; (use ducttape-lib)
+(include "megatest-version.scm")
+(include "megatest-fossil-hash.scm")
+
+;; (require-library stml)
+
+(define help (conc "
+mtutil, part of the Megatest tool suite, documentation at http://www.kiatoa.com/fossils/megatest
+ version " megatest-version "
+ license GPL, Copyright Matt Welland 2006-2017
+
+Usage: mtutil action [options]
+ -h : this help
+ -manual : show the Megatest user manual
+ -version : print megatest version (currently " megatest-version ")
+
+Examples:
+
+Called as " (string-intersperse (argv) " ") "
+Version " megatest-version ", built from " megatest-fossil-hash ))
+ ;; first token is our action, but only if no leading dash
+
+(define *action* (if (and (> (length (argv)) 1)
+ (not (string-match "^\\-.*" (cadr (argv)))))
+ (cadr (argv))
+ #f))
+
+(define *remargs*
+ (args:get-args
+ (if *action* (cdr (argv)) (argv))
+ '("-log")
+ '("-h")
+ args:arg-hash
+ 0))
+
+(if (args:get-arg "-h")
+ (begin
+ (print help)
+ (exit)))
+
+(if (or (args:get-arg "-repl")
+ (args:get-arg "-load"))
+ (begin
+ (import extras) ;; might not be needed
+ ;; (import csi)
+ (import readline)
+ (import apropos)
+ ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ...
+
+ (install-history-file (get-environment-variable "HOME") ".mtutil_history") ;; [homedir] [filename] [nlines])
+ (current-input-port (make-readline-port "mtutil> "))
+ (if (args:get-arg "-repl")
+ (repl)
+ (load (args:get-arg "-load")))))
+
+#|
+(define mtconf (car (simple-setup #f)))
+(define dat (common:with-queue-db mtconf (lambda (conn)(get-pkts conn '()))))
+(pp (pkts#flatten-all dat '((cmd . ((parent . P)(url . M)))(runtype . ((parent . P)))) 'id 'group-id 'uuid 'parent 'pkt-type 'pkt 'processed))
+|#
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -22,1056 +22,1058 @@
(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
(declare (uses dbfile))
-(include "common_records.scm")
-;; (declare (uses rmtmod))
-
-(import dbfile) ;; rmtmod)
-
-;;
-;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
-;;
-
-;; generate entries for ~/.megatestrc with the following
-;;
-;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
-
-;;======================================================================
-;; S U P P O R T F U N C T I O N S
-;;======================================================================
-
-;; if a server is either running or in the process of starting call client:setup
-;; else return #f to let the calling proc know that there is no server available
-;;
-(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
- (let* ((cinfo (if (remote? runremote)
- (remote-conndat runremote)
- #f)))
- (if cinfo
- cinfo
- (if (server:check-if-running areapath)
- (client:setup areapath runremote)
- #f))))
-
-(define (rmt:on-homehost? runremote)
- (let* ((hh-dat (remote-hh-dat runremote)))
- (if (pair? hh-dat)
- (cdr hh-dat)
- (begin
- (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
- #f))))
-
-
-;;======================================================================
-
-(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
-
-;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
-;;
-(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
-
- #;(common:telemetry-log (conc "rmt:"(->string cmd))
- payload: `((rid . ,rid)
- (params . ,params)))
-
- (if (> attemptnum 2)
- (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
-
- (cond
- ((> attemptnum 2) (thread-sleep! 0.05))
- ((> attemptnum 10) (thread-sleep! 0.5))
- ((> attemptnum 20) (thread-sleep! 1)))
- (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))
- (begin (server:run *toppath*) (thread-sleep! 3)))
-
-
- ;;DOT digraph megatest_state_status {
- ;;DOT ranksep=0;
- ;;DOT // rankdir=LR;
- ;;DOT node [shape="box"];
- ;;DOT "rmt:send-receive" -> MUTEXLOCK;
- ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
- ;; do all the prep locked under the rmt-mutex
- (mutex-lock! *rmt-mutex*)
-
- ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
- ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
- ;; 3. do the query, if on homehost use local access
- ;;
- (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
- (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
- (runremote (or area-dat
- *runremote*))
- (attemptnum (+ 1 attemptnum))
- (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
-
- ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
- ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
- ;; ensure we have a record for our connection for given area
- (if (not runremote) ;; can remove this one. should never get here.
- (begin
- (set! *runremote* (make-remote))
- (let* ((server-info (remote-server-info *runremote*)))
- (if server-info
- (begin
- (remote-server-url-set! *runremote* (server:record->url server-info))
- (remote-server-id-set! *runremote* (server:record->id server-info)))))
- (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
-
- ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
- ;; DOT SET_HOMEHOST -> MUTEXLOCK;
- ;; ensure we have a homehost record
- (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
- (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
- (let ((hh-data (server:choose-server areapath 'homehost)))
- (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
-
- ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
- (cond
- #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
- (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
- (set! *runremote* #f)
- ;; BUG: close-connections should go here?
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
-
- ;;DOT EXIT;
- ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
- ;; give up if more than 150 attempts
- ((> attemptnum 150)
- (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
- (exit 1))
-
- ;;DOT CASE2 [label="local\nreadonly\nquery"];
- ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
- ;;DOT CASE2 -> "rmt:open-qry-close-locally";
- ;; readonly mode, read request- handle it - case 2
- ((and readonly-mode
- (member cmd api:read-only-queries))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
- (rmt:open-qry-close-locally cmd 0 params)
- )
-
- ;;DOT CASE3 [label="write in\nread-only mode"];
- ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
- ;;DOT CASE3 -> "#f";
- ;; readonly mode, write request. Do nothing, return #f
- (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
-
- ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
- ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
- ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
- ;;
- ;;DOT CASE4 [label="reset\nconnection"];
- ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
- ;;DOT CASE4 -> "rmt:send-receive";
- ;; reset the connection if it has been unused too long
- ((and runremote
- ;; (remote-conndat runremote)
- (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
- (+ (remote-last-access runremote)
- (remote-server-timeout runremote))))
- (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
- (http-transport:close-connections runremote)
- ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
- ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE5 [label="local\nread"];
- ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
- ;;DOT CASE5 -> "rmt:open-qry-close-locally";
-
- ;; on homehost and this is a read
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (rmt:on-homehost? runremote)
- (member cmd api:read-only-queries)) ;; this is a read
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE6 [label="init\nremote"];
- ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
- ;;DOT CASE6 -> "rmt:send-receive";
- ;; on homehost and this is a write, we already have a server, but server has died
-
- ;; reinstate this keep-alive section but inject a time condition into the (add ...
- ;;
- ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost
- ;; (not (member cmd api:read-only-queries)) ;; this is a write
- ;; (remote-server-url runremote) ;; have a server
- ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
- ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
- ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
- ;; (set! *runremote* (make-remote))
- ;; (let* ((server-info (remote-server-info *runremote*)))
- ;; (if server-info
- ;; (begin
- ;; (remote-server-url-set! *runremote* (server:record->url server-info))
- ;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
- ;; (remote-force-server-set! runremote (common:force-server?))
- ;; (mutex-unlock! *rmt-mutex*)
- ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
- ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE7 [label="homehost\nwrite"];
- ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
- ;;DOT CASE7 -> "rmt:open-qry-close-locally";
- ;; on homehost and this is a write, we already have a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; on homehost
- (not (member cmd api:read-only-queries)) ;; this is a write
- (remote-server-url runremote)) ;; have a server
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE8 [label="force\nserver"];
- ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
- ;;DOT CASE8 -> "rmt:open-qry-close-locally";
- ;; on homehost, no server contact made and this is a write, passively start a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; have homehost
- (not (remote-server-url runremote)) ;; no connection yet
- (not (member cmd api:read-only-queries))) ;; not a read-only query
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
- (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
- (if server-info
- (begin
- (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
- (remote-server-id-set! runremote (server:record->id server-info)))
- (if (common:force-server?)
- (server:start-and-wait *toppath*)
- (server:kind-run *toppath*)))
- (remote-force-server-set! runremote (common:force-server?))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
- (rmt:open-qry-close-locally cmd 0 params)))
-
- ;;DOT CASE9 [label="force server\nnot on homehost"];
- ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
- ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
- ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
- (not (remote-conndat runremote)))
- (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
- (not (remote-conndat runremote)))) ;; and no connection
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
- (mutex-unlock! *rmt-mutex*)
- (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
- (server:start-and-wait *toppath*))
- (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
- (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
-
- ;;DOT CASE10 [label="on homehost"];
- ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
- ;;DOT CASE10 -> "rmt:open-qry-close-locally";
- ;; all set up if get this far, dispatch the query
- ((and (not (remote-force-server runremote))
- (cdr (remote-hh-dat runremote))) ;; we are on homehost
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
- (rmt:open-qry-close-locally cmd (if rid rid 0) params))
-
- ;;DOT CASE11 [label="send_receive"];
- ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
- ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
- ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
- ;; not on homehost, do server query
- (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
- ;;DOT }
-
-;; bunch of small functions factored out of send-receive to make debug easier
-;;
-
-(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
- ;; (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
- ;; (mutex-lock! *rmt-mutex*)
- (let* ((conninfo (remote-conndat runremote))
- (dat-in (condition-case ;; handling here has
- ;; caused a lot of
- ;; problems. However it
- ;; is needed to deal with
- ;; attemtped
- ;; communication to
- ;; servers that have gone
- ;; away
- (http-transport:client-api-send-receive 0 runremote cmd params)
- ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
- ((servermismatch) (vector #f "Server id mismatch" ))
- ((commfail)(vector #f "communications fail"))
- ((exn)(vector #f "other fail" (print-call-chain)))))
- (dat (if (and (vector? dat-in) ;; ... check it is a correct size
- (> (vector-length dat-in) 1))
- dat-in
- (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
- (success (if (vector? dat) (vector-ref dat 0) #f))
- (res (if (vector? dat) (vector-ref dat 1) #f)))
- (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
- (remote-last-access-set! runremote (current-seconds)) ;; refresh access time
- (begin
- (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
- (set! conninfo #f)
- (http-transport:close-connections runremote)))
- (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
- (mutex-unlock! *rmt-mutex*)
- (if success ;; success only tells us that the transport was
- ;; successful, have to examine the data to see if
- ;; there was a detected issue at the other end
- (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
- (begin
- (debug:print-error 0 *default-log-port* " dat=" dat)
- (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
- )))
-
-(define (rmt:print-db-stats)
- (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
- (debug:print 18 *default-log-port* "DB Stats\n========")
- (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
- (for-each (lambda (cmd)
- (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
- (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
- (sort (hash-table-keys *db-stats*)
- (lambda (a b)
- (> (vector-ref (hash-table-ref *db-stats* a) 0)
- (vector-ref (hash-table-ref *db-stats* b) 0)))))))
-
-(define (rmt:get-max-query-average run-id)
- (mutex-lock! *db-stats-mutex*)
- (let* ((runkey (conc "run-id=" run-id " "))
- (cmds (filter (lambda (x)
- (substring-index runkey x))
- (hash-table-keys *db-stats*)))
- (res (if (null? cmds)
- (cons 'none 0)
- (let loop ((cmd (car cmds))
- (tal (cdr cmds))
- (max-cmd (car cmds))
- (res 0))
- (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
- (tot (vector-ref cmd-dat 0))
- (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
- (currmax (max res curravg))
- (newmax-cmd (if (> curravg res) cmd max-cmd)))
- (if (null? tal)
- (if (> tot 10)
- (cons newmax-cmd currmax)
- (cons 'none 0))
- (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
- (mutex-unlock! *db-stats-mutex*)
- res))
-
-(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
- (let* ((qry-is-write (not (member cmd api:read-only-queries)))
- (db-file-path (db:dbfile-path)) ;; 0))
- (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
- (read-only (not (file-write-access? db-file-path)))
- (start (current-milliseconds))
- (resdat (if (not (and read-only qry-is-write))
- (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
- ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
- ;; exn ;; This is an attempt to detect that situation and recover gracefully
- ;; (begin
- ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
- ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
- (if (and (vector? v)
- (> (vector-length v) 1))
- (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
- newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
- (vector #t '()))) ;; ) ;; we could also check that the returned types are valid
- (vector #t '())))
- (success (vector-ref resdat 0))
- (res (vector-ref resdat 1))
- (duration (- (current-milliseconds) start)))
- (if (and read-only qry-is-write)
- (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
- (if (not success)
- (if (> remretries 0)
- (begin
- (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
- (thread-sleep! (/ (random 5000) 1000)) ;; some random delay
- (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
- (begin
- (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
- #f))
- (begin
- ;; (rmt:update-db-stats run-id cmd params duration)
- ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
- (if qry-is-write
- (let ((start-time (current-seconds)))
- (mutex-lock! *db-multi-sync-mutex*)
-/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client)
- (mutex-unlock! *db-multi-sync-mutex*)))))
- res))
-
-(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
- (let* ((run-id (if run-id run-id 0))
- (res (http-transport:client-api-send-receive run-id runremote cmd params)))
- (if (and res (vector-ref res 0))
- (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
- #f)))
-
-;;======================================================================
-;;
-;; A C T U A L A P I C A L L S
-;;
-;;======================================================================
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-(define (rmt:kill-server run-id)
- (rmt:send-receive 'kill-server run-id (list run-id)))
-
-(define (rmt:start-server run-id)
- (rmt:send-receive 'start-server 0 (list run-id)))
-
-;;======================================================================
-;; M I S C
-;;======================================================================
-
-(define (rmt:login run-id)
- (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))
-
-;; This login does no retries under the hood - it acts a bit like a ping.
-;; Deprecated for nmsg-transport.
-;;
-(define (rmt:login-no-auto-client-setup runremote)
- (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
-
-;; hand off a call to one of the db:queries statements
-;; added run-id to make looking up the correct db possible
-;;
-(define (rmt:general-call stmtname run-id . params)
- (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
-
-
-;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
-(define (rmt:get-latest-host-load hostname)
- (rmt:send-receive 'get-latest-host-load 0 (list hostname)))
-
-(define (rmt:sdb-qry qry val run-id)
- ;; add caching if qry is 'getid or 'getstr
- (rmt:send-receive 'sdb-qry run-id (list qry val)))
-
-;; NOT COMPLETED
-(define (rmt:runtests user run-id testpatt params)
- (rmt:send-receive 'runtests run-id testpatt))
-
-(define (rmt:get-run-record-ids target run keynames test-patt)
- (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))
-
-(define (rmt:get-changed-record-ids since-time)
- (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )
-
-(define (rmt:drop-all-triggers)
- (rmt:send-receive 'drop-all-triggers #f '()))
-
-(define (rmt:create-all-triggers)
- (rmt:send-receive 'create-all-triggers #f '()))
-
-;;======================================================================
-;; T E S T M E T A
-;;======================================================================
-
-(define (rmt:get-tests-tags)
- (rmt:send-receive 'get-tests-tags #f '()))
-
-;;======================================================================
-;; K E Y S
-;;======================================================================
-
-;; These require run-id because the values come from the run!
-;;
-(define (rmt:get-key-val-pairs run-id)
- (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
-
-(define (rmt:get-keys)
- (if *db-keys* *db-keys*
- (let ((res (rmt:send-receive 'get-keys #f '())))
- (set! *db-keys* res)
- res)))
-
-(define (rmt:get-keys-write) ;; dummy query to force server start
- (let ((res (rmt:send-receive 'get-keys-write #f '())))
- (set! *db-keys* res)
- res))
-
-;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
-;; to cache the resuls in a hash
-;;
-(define (rmt:get-key-vals run-id)
- (or (hash-table-ref/default *keyvals* run-id #f)
- (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
- (hash-table-set! *keyvals* run-id res)
- res)))
-
-(define (rmt:get-targets)
- (rmt:send-receive 'get-targets #f '()))
-
-(define (rmt:get-target run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-target run-id (list run-id)))
-
-(define (rmt:get-run-times runpatt targetpatt)
- (rmt:send-receive 'get-run-times #f (list runpatt targetpatt )))
-
-
-;;======================================================================
-;; T E S T S
-;;======================================================================
-
-;; Just some syntatic sugar
-(define (rmt:register-test run-id test-name item-path)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:general-call 'register-test run-id run-id test-name item-path))
-
-(define (rmt:get-test-id run-id testname item-path)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
-
-;; run-id is NOT used
-;;
-(define (rmt:get-test-info-by-id run-id test-id)
- (if (number? test-id)
- (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
- (begin
- (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
- (print-call-chain (current-error-port))
- #f)))
-
-(define (rmt:test-get-rundir-from-test-id run-id test-id)
- (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
-
-(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (let* ((test-path (if (string? work-area)
- work-area
- (rmt:test-get-rundir-from-test-id run-id test-id))))
- (debug:print 3 *default-log-port* "TEST PATH: " test-path)
- (open-test-db test-path)))
-
-;; WARNING: This currently bypasses the transaction wrapped writes system
-(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
-
-(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
-
-(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
- (assert (number? run-id) "FATAL: Run id required.")
- ;; (if (number? run-id)
- (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
- ;; (begin
- ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
- ;; (print-call-chain (current-error-port))
- ;; '())))
-
-(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
-
-;; get stuff via synchash
-(define (rmt:synchash-get run-id proc synckey keynum params)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
-
-(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
-
-;; IDEA: Threadify these - they spend a lot of time waiting ...
-;;
-(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
- (let ((multi-run-mutex (make-mutex))
- (run-id-list (if run-ids
- run-ids
- (rmt:get-all-run-ids)))
- (result '()))
- (if (null? run-id-list)
- '()
- (let loop ((hed (car run-id-list))
- (tal (cdr run-id-list))
- (threads '()))
- (if (> (length threads) 5)
- (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
- (let* ((newthread (make-thread
- (lambda ()
- (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
- (if (list? res)
- (begin
- (mutex-lock! multi-run-mutex)
- (set! result (append result res))
- (mutex-unlock! multi-run-mutex))
- (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
- (conc "multi-run-thread for run-id " hed)))
- (newthreads (cons newthread threads)))
- (thread-start! newthread)
- (thread-sleep! 0.05) ;; give that thread some time to start
- (if (null? tal)
- newthreads
- (loop (car tal)(cdr tal) newthreads))))))
- result))
-
+
+;; (include "common_records.scm")
+;; ;; (declare (uses rmtmod))
+;;
+;; (import dbfile) ;; rmtmod)
+;;
+;; ;;
+;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
+;; ;;
+;;
+;; ;; generate entries for ~/.megatestrc with the following
+;; ;;
+;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
+;;
+;; ;;======================================================================
+;; ;; S U P P O R T F U N C T I O N S
+;; ;;======================================================================
+;;
+;; ;; if a server is either running or in the process of starting call client:setup
+;; ;; else return #f to let the calling proc know that there is no server available
+;; ;;
+;; (define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
+;; (let* ((cinfo (if (remote? runremote)
+;; (remote-conndat runremote)
+;; #f)))
+;; (if cinfo
+;; cinfo
+;; (if (server:check-if-running areapath)
+;; (client:setup areapath runremote)
+;; #f))))
+;;
+;; (define (rmt:on-homehost? runremote)
+;; (let* ((hh-dat (remote-hh-dat runremote)))
+;; (if (pair? hh-dat)
+;; (cdr hh-dat)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
+;; #f))))
+;;
+;;
+;; ;;======================================================================
+;;
+;; (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
+;;
+;; ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
+;; ;;
+;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
+;;
+;; #;(common:telemetry-log (conc "rmt:"(->string cmd))
+;; payload: `((rid . ,rid)
+;; (params . ,params)))
+;;
+;; (if (> attemptnum 2)
+;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
+;;
+;; (cond
+;; ((> attemptnum 2) (thread-sleep! 0.05))
+;; ((> attemptnum 10) (thread-sleep! 0.5))
+;; ((> attemptnum 20) (thread-sleep! 1)))
+;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))
+;; (begin (server:run *toppath*) (thread-sleep! 3)))
+;;
+;;
+;; ;;DOT digraph megatest_state_status {
+;; ;;DOT ranksep=0;
+;; ;;DOT // rankdir=LR;
+;; ;;DOT node [shape="box"];
+;; ;;DOT "rmt:send-receive" -> MUTEXLOCK;
+;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
+;; ;; do all the prep locked under the rmt-mutex
+;; (mutex-lock! *rmt-mutex*)
+;;
+;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
+;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
+;; ;; 3. do the query, if on homehost use local access
+;; ;;
+;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
+;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
+;; (runremote (or area-dat
+;; *runremote*))
+;; (attemptnum (+ 1 attemptnum))
+;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
+;;
+;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
+;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
+;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
+;; ;; ensure we have a record for our connection for given area
+;; (if (not runremote) ;; can remove this one. should never get here.
+;; (begin
+;; (set! *runremote* (make-remote))
+;; (let* ((server-info (remote-server-info *runremote*)))
+;; (if server-info
+;; (begin
+;; (remote-server-url-set! *runremote* (server:record->url server-info))
+;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
+;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
+;;
+;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
+;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
+;; ;; DOT SET_HOMEHOST -> MUTEXLOCK;
+;; ;; ensure we have a homehost record
+;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
+;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
+;; (let ((hh-data (server:choose-server areapath 'homehost)))
+;; (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
+;;
+;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
+;; (cond
+;; #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
+;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
+;; (set! *runremote* #f)
+;; ;; BUG: close-connections should go here?
+;; (mutex-unlock! *rmt-mutex*)
+;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
+;;
+;; ;;DOT EXIT;
+;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
+;; ;; give up if more than 150 attempts
+;; ((> attemptnum 150)
+;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
+;; (exit 1))
+;;
+;; ;;DOT CASE2 [label="local\nreadonly\nquery"];
+;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
+;; ;;DOT CASE2 -> "rmt:open-qry-close-locally";
+;; ;; readonly mode, read request- handle it - case 2
+;; ((and readonly-mode
+;; (member cmd api:read-only-queries))
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
+;; (rmt:open-qry-close-locally cmd 0 params)
+;; )
+;;
+;; ;;DOT CASE3 [label="write in\nread-only mode"];
+;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
+;; ;;DOT CASE3 -> "#f";
+;; ;; readonly mode, write request. Do nothing, return #f
+;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
+;;
+;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
+;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
+;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
+;; ;;
+;; ;;DOT CASE4 [label="reset\nconnection"];
+;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
+;; ;;DOT CASE4 -> "rmt:send-receive";
+;; ;; reset the connection if it has been unused too long
+;; ((and runremote
+;; ;; (remote-conndat runremote)
+;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
+;; (+ (remote-last-access runremote)
+;; (remote-server-timeout runremote))))
+;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
+;; (http-transport:close-connections runremote)
+;; ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
+;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
+;; (mutex-unlock! *rmt-mutex*)
+;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
+;;
+;; ;;DOT CASE5 [label="local\nread"];
+;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
+;; ;;DOT CASE5 -> "rmt:open-qry-close-locally";
+;;
+;; ;; on homehost and this is a read
+;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; (rmt:on-homehost? runremote)
+;; (member cmd api:read-only-queries)) ;; this is a read
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
+;; (rmt:open-qry-close-locally cmd 0 params))
+;;
+;; ;;DOT CASE6 [label="init\nremote"];
+;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
+;; ;;DOT CASE6 -> "rmt:send-receive";
+;; ;; on homehost and this is a write, we already have a server, but server has died
+;;
+;; ;; reinstate this keep-alive section but inject a time condition into the (add ...
+;; ;;
+;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost
+;; ;; (not (member cmd api:read-only-queries)) ;; this is a write
+;; ;; (remote-server-url runremote) ;; have a server
+;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
+;; ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
+;; ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
+;; ;; (set! *runremote* (make-remote))
+;; ;; (let* ((server-info (remote-server-info *runremote*)))
+;; ;; (if server-info
+;; ;; (begin
+;; ;; (remote-server-url-set! *runremote* (server:record->url server-info))
+;; ;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
+;; ;; (remote-force-server-set! runremote (common:force-server?))
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
+;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
+;;
+;; ;;DOT CASE7 [label="homehost\nwrite"];
+;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
+;; ;;DOT CASE7 -> "rmt:open-qry-close-locally";
+;; ;; on homehost and this is a write, we already have a server
+;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; (cdr (remote-hh-dat runremote)) ;; on homehost
+;; (not (member cmd api:read-only-queries)) ;; this is a write
+;; (remote-server-url runremote)) ;; have a server
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
+;; (rmt:open-qry-close-locally cmd 0 params))
+;;
+;; ;;DOT CASE8 [label="force\nserver"];
+;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
+;; ;;DOT CASE8 -> "rmt:open-qry-close-locally";
+;; ;; on homehost, no server contact made and this is a write, passively start a server
+;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; (cdr (remote-hh-dat runremote)) ;; have homehost
+;; (not (remote-server-url runremote)) ;; no connection yet
+;; (not (member cmd api:read-only-queries))) ;; not a read-only query
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
+;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
+;; (if server-info
+;; (begin
+;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
+;; (remote-server-id-set! runremote (server:record->id server-info)))
+;; (if (common:force-server?)
+;; (server:start-and-wait *toppath*)
+;; (server:kind-run *toppath*)))
+;; (remote-force-server-set! runremote (common:force-server?))
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
+;; (rmt:open-qry-close-locally cmd 0 params)))
+;;
+;; ;;DOT CASE9 [label="force server\nnot on homehost"];
+;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
+;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
+;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
+;; (not (remote-conndat runremote)))
+;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
+;; (not (remote-conndat runremote)))) ;; and no connection
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
+;; (mutex-unlock! *rmt-mutex*)
+;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
+;; (server:start-and-wait *toppath*))
+;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
+;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
+;;
+;; ;;DOT CASE10 [label="on homehost"];
+;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
+;; ;;DOT CASE10 -> "rmt:open-qry-close-locally";
+;; ;; all set up if get this far, dispatch the query
+;; ((and (not (remote-force-server runremote))
+;; (cdr (remote-hh-dat runremote))) ;; we are on homehost
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
+;; (rmt:open-qry-close-locally cmd (if rid rid 0) params))
+;;
+;; ;;DOT CASE11 [label="send_receive"];
+;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
+;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
+;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
+;; ;; not on homehost, do server query
+;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
+;; ;;DOT }
+;;
+;; ;; bunch of small functions factored out of send-receive to make debug easier
+;; ;;
+;;
+;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
+;; ;; (mutex-lock! *rmt-mutex*)
+;; (let* ((conninfo (remote-conndat runremote))
+;; (dat-in (condition-case ;; handling here has
+;; ;; caused a lot of
+;; ;; problems. However it
+;; ;; is needed to deal with
+;; ;; attemtped
+;; ;; communication to
+;; ;; servers that have gone
+;; ;; away
+;; (http-transport:client-api-send-receive 0 runremote cmd params)
+;; ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
+;; ((servermismatch) (vector #f "Server id mismatch" ))
+;; ((commfail)(vector #f "communications fail"))
+;; ((exn)(vector #f "other fail" (print-call-chain)))))
+;; (dat (if (and (vector? dat-in) ;; ... check it is a correct size
+;; (> (vector-length dat-in) 1))
+;; dat-in
+;; (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
+;; (success (if (vector? dat) (vector-ref dat 0) #f))
+;; (res (if (vector? dat) (vector-ref dat 1) #f)))
+;; (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
+;; (remote-last-access-set! runremote (current-seconds)) ;; refresh access time
+;; (begin
+;; (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
+;; (set! conninfo #f)
+;; (http-transport:close-connections runremote)))
+;; (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
+;; (mutex-unlock! *rmt-mutex*)
+;; (if success ;; success only tells us that the transport was
+;; ;; successful, have to examine the data to see if
+;; ;; there was a detected issue at the other end
+;; (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
+;; (begin
+;; (debug:print-error 0 *default-log-port* " dat=" dat)
+;; (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
+;; )))
+;;
+;; (define (rmt:print-db-stats)
+;; (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+;; (debug:print 18 *default-log-port* "DB Stats\n========")
+;; (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+;; (for-each (lambda (cmd)
+;; (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+;; (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+;; (sort (hash-table-keys *db-stats*)
+;; (lambda (a b)
+;; (> (vector-ref (hash-table-ref *db-stats* a) 0)
+;; (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+;;
+;; (define (rmt:get-max-query-average run-id)
+;; (mutex-lock! *db-stats-mutex*)
+;; (let* ((runkey (conc "run-id=" run-id " "))
+;; (cmds (filter (lambda (x)
+;; (substring-index runkey x))
+;; (hash-table-keys *db-stats*)))
+;; (res (if (null? cmds)
+;; (cons 'none 0)
+;; (let loop ((cmd (car cmds))
+;; (tal (cdr cmds))
+;; (max-cmd (car cmds))
+;; (res 0))
+;; (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+;; (tot (vector-ref cmd-dat 0))
+;; (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+;; (currmax (max res curravg))
+;; (newmax-cmd (if (> curravg res) cmd max-cmd)))
+;; (if (null? tal)
+;; (if (> tot 10)
+;; (cons newmax-cmd currmax)
+;; (cons 'none 0))
+;; (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+;; (mutex-unlock! *db-stats-mutex*)
+;; res))
+;;
+;; (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
+;; (let* ((qry-is-write (not (member cmd api:read-only-queries)))
+;; (db-file-path (db:dbfile-path)) ;; 0))
+;; (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
+;; (read-only (not (file-write-access? db-file-path)))
+;; (start (current-milliseconds))
+;; (resdat (if (not (and read-only qry-is-write))
+;; (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
+;; ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
+;; ;; exn ;; This is an attempt to detect that situation and recover gracefully
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
+;; ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
+;; (if (and (vector? v)
+;; (> (vector-length v) 1))
+;; (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
+;; newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
+;; (vector #t '()))) ;; ) ;; we could also check that the returned types are valid
+;; (vector #t '())))
+;; (success (vector-ref resdat 0))
+;; (res (vector-ref resdat 1))
+;; (duration (- (current-milliseconds) start)))
+;; (if (and read-only qry-is-write)
+;; (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
+;; (if (not success)
+;; (if (> remretries 0)
+;; (begin
+;; (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
+;; (thread-sleep! (/ (random 5000) 1000)) ;; some random delay
+;; (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
+;; (begin
+;; (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
+;; #f))
+;; (begin
+;; ;; (rmt:update-db-stats run-id cmd params duration)
+;; ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
+;; (if qry-is-write
+;; (let ((start-time (current-seconds)))
+;; (mutex-lock! *db-multi-sync-mutex*)
+;; / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client)
+;; (mutex-unlock! *db-multi-sync-mutex*)))))
+;; res))
+;;
+;; (define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
+;; (let* ((run-id (if run-id run-id 0))
+;; (res (http-transport:client-api-send-receive run-id runremote cmd params)))
+;; (if (and res (vector-ref res 0))
+;; (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
+;; #f)))
+;;
+;; ;;======================================================================
+;; ;;
+;; ;; A C T U A L A P I C A L L S
+;; ;;
+;; ;;======================================================================
+;;
+;; ;;======================================================================
+;; ;; S E R V E R
+;; ;;======================================================================
+;;
+;; (define (rmt:kill-server run-id)
+;; (rmt:send-receive 'kill-server run-id (list run-id)))
+;;
+;; (define (rmt:start-server run-id)
+;; (rmt:send-receive 'start-server 0 (list run-id)))
+;;
+;; ;;======================================================================
+;; ;; M I S C
+;; ;;======================================================================
+;;
+;; (define (rmt:login run-id)
+;; (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))
+;;
+;; ;; This login does no retries under the hood - it acts a bit like a ping.
+;; ;; Deprecated for nmsg-transport.
+;; ;;
+;; (define (rmt:login-no-auto-client-setup runremote)
+;; (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
+;;
+;; ;; hand off a call to one of the db:queries statements
+;; ;; added run-id to make looking up the correct db possible
+;; ;;
+;; (define (rmt:general-call stmtname run-id . params)
+;; (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
+;;
+;;
+;; ;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
+;; (define (rmt:get-latest-host-load hostname)
+;; (rmt:send-receive 'get-latest-host-load 0 (list hostname)))
+;;
+;; (define (rmt:sdb-qry qry val run-id)
+;; ;; add caching if qry is 'getid or 'getstr
+;; (rmt:send-receive 'sdb-qry run-id (list qry val)))
+;;
+;; ;; NOT COMPLETED
+;; (define (rmt:runtests user run-id testpatt params)
+;; (rmt:send-receive 'runtests run-id testpatt))
+;;
+;; (define (rmt:get-run-record-ids target run keynames test-patt)
+;; (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))
+;;
+;; (define (rmt:get-changed-record-ids since-time)
+;; (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )
+;;
+;; (define (rmt:drop-all-triggers)
+;; (rmt:send-receive 'drop-all-triggers #f '()))
+;;
+;; (define (rmt:create-all-triggers)
+;; (rmt:send-receive 'create-all-triggers #f '()))
+;;
+;; ;;======================================================================
+;; ;; T E S T M E T A
+;; ;;======================================================================
+;;
+;; (define (rmt:get-tests-tags)
+;; (rmt:send-receive 'get-tests-tags #f '()))
+;;
+;; ;;======================================================================
+;; ;; K E Y S
+;; ;;======================================================================
+;;
+;; ;; These require run-id because the values come from the run!
+;; ;;
+;; (define (rmt:get-key-val-pairs run-id)
+;; (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
+;;
+;; (define (rmt:get-keys)
+;; (if *db-keys* *db-keys*
+;; (let ((res (rmt:send-receive 'get-keys #f '())))
+;; (set! *db-keys* res)
+;; res)))
+;;
+;; (define (rmt:get-keys-write) ;; dummy query to force server start
+;; (let ((res (rmt:send-receive 'get-keys-write #f '())))
+;; (set! *db-keys* res)
+;; res))
+;;
+;; ;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
+;; ;; to cache the resuls in a hash
+;; ;;
+;; (define (rmt:get-key-vals run-id)
+;; (or (hash-table-ref/default *keyvals* run-id #f)
+;; (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
+;; (hash-table-set! *keyvals* run-id res)
+;; res)))
+;;
+;; (define (rmt:get-targets)
+;; (rmt:send-receive 'get-targets #f '()))
+;;
+;; (define (rmt:get-target run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-target run-id (list run-id)))
+;;
+;; (define (rmt:get-run-times runpatt targetpatt)
+;; (rmt:send-receive 'get-run-times #f (list runpatt targetpatt )))
+;;
+;;
+;; ;;======================================================================
+;; ;; T E S T S
+;; ;;======================================================================
+;;
+;; ;; Just some syntatic sugar
+;; (define (rmt:register-test run-id test-name item-path)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:general-call 'register-test run-id run-id test-name item-path))
+;;
+;; (define (rmt:get-test-id run-id testname item-path)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
+;;
+;; ;; run-id is NOT used
+;; ;;
+;; (define (rmt:get-test-info-by-id run-id test-id)
+;; (if (number? test-id)
+;; (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
+;; (print-call-chain (current-error-port))
+;; #f)))
+;;
+;; (define (rmt:test-get-rundir-from-test-id run-id test-id)
+;; (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
+;;
+;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (let* ((test-path (if (string? work-area)
+;; work-area
+;; (rmt:test-get-rundir-from-test-id run-id test-id))))
+;; (debug:print 3 *default-log-port* "TEST PATH: " test-path)
+;; (open-test-db test-path)))
+;;
+;; ;; WARNING: This currently bypasses the transaction wrapped writes system
+;; (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
+;;
+;; (define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
+;;
+;; (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; ;; (if (number? run-id)
+;; (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
+;; ;; (begin
+;; ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
+;; ;; (print-call-chain (current-error-port))
+;; ;; '())))
+;;
+;; (define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
+;;
+;; ;; get stuff via synchash
+;; (define (rmt:synchash-get run-id proc synckey keynum params)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
+;;
+;; (define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
+;;
;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
;; ;;
;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
-;; (let ((run-id-list (if run-ids
+;; (let ((multi-run-mutex (make-mutex))
+;; (run-id-list (if run-ids
;; run-ids
-;; (rmt:get-all-run-ids))))
-;; (apply append (map (lambda (run-id)
-;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
-;; run-id-list))))
-
-(define (rmt:delete-test-records run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
-
-(define (rmt:test-set-state-status run-id test-id state status msg)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))
-
-(define (rmt:test-toplevel-num-items run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))
-
-;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
-;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
-
-(define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
-
-(define (rmt:test-get-logfile-info run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
-
-(define (rmt:test-get-records-for-index-file run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
-
-(define (rmt:get-testinfo-state-status run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
-
-(define (rmt:test-set-log! run-id test-id logf)
- (assert (number? run-id) "FATAL: Run id required.")
- (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
-
-(define (rmt:test-set-top-process-pid run-id test-id pid)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))
-
-(define (rmt:test-get-top-process-pid run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))
-
-(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
- (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
-
-;; NOTE: This will open and access ALL run databases.
-;;
-(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
- (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
- (apply append
- (map (lambda (run-id)
- (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
- run-ids))))
-
-(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
-
-(define (rmt:get-count-tests-running-for-run-id run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
-
-(define (rmt:get-not-completed-cnt run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
-
-
-;; Statistical queries
-
-(define (rmt:get-count-tests-running run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
-
-(define (rmt:get-count-tests-running-for-testname run-id testname)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
-
-(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
-
-;; state and status are extra hints not usually used in the calculation
-;;
-(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
-
-(define (rmt:set-state-status-and-roll-up-run run-id state status)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
-
-
-(define (rmt:update-pass-fail-counts run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
-
-(define (rmt:top-test-set-per-pf-counts run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))
-
-(define (rmt:get-raw-run-stats run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))
-
-(define (rmt:get-test-times runname target)
- (rmt:send-receive 'get-test-times #f (list runname target )))
-
-;;======================================================================
-;; R U N S
-;;======================================================================
-
-;; BUG - LOOK AT HOW THIS WORKS!!!
-;;
-(define (rmt:get-run-info run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-info #f (list run-id)))
-
-(define (rmt:get-num-runs runpatt)
- (rmt:send-receive 'get-num-runs #f (list runpatt)))
-
-(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
- (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys)))
-
-;; Use the special run-id == #f scenario here since there is no run yet
-(define (rmt:register-run keyvals runname state status user contour)
- (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
-
-(define (rmt:get-run-name-from-id run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-name-from-id #f (list run-id)))
-
-(define (rmt:delete-run run-id)
- (rmt:send-receive 'delete-run #f (list run-id)))
-
-(define (rmt:update-run-stats run-id stats)
- (rmt:send-receive 'update-run-stats #f (list run-id stats)))
-
-(define (rmt:delete-old-deleted-test-records)
- (rmt:send-receive 'delete-old-deleted-test-records #f '()))
-
-(define (rmt:get-runs runpatt count offset keypatts)
- (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))
-
-(define (rmt:simple-get-runs runpatt count offset target last-update)
- (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update)))
-
-(define (rmt:get-all-run-ids)
- (rmt:send-receive 'get-all-run-ids #f '()))
-
-(define (rmt:get-prev-run-ids run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
-
-(define (rmt:lock/unlock-run run-id lock unlock user)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
-
-;; set/get status
-(define (rmt:get-run-status run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-status #f (list run-id)))
-
-(define (rmt:get-run-state run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-state #f (list run-id)))
-
-
-(define (rmt:set-run-status run-id run-status #!key (msg #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
-
-(define (rmt:set-run-state-status run-id state status )
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-run-state-status #f (list run-id state status)))
-
-(define (rmt:update-tesdata-on-repilcate-db old-lt new-lt)
-(rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt)))
-
-(define (rmt:update-run-event_time run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'update-run-event_time #f (list run-id)))
-
-(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default
- (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))
-
-(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
- (assert (number? run-id) "FATAL: Run id required.")
- ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
- (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )
-
-(define (rmt:get-main-run-stats run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-main-run-stats #f (list run-id)))
-
-(define (rmt:get-var varname)
- (rmt:send-receive 'get-var #f (list varname)))
-
-(define (rmt:del-var varname)
- (rmt:send-receive 'del-var #f (list varname)))
-
-(define (rmt:set-var varname value)
- (rmt:send-receive 'set-var #f (list varname value)))
-
-(define (rmt:inc-var varname)
- (rmt:send-receive 'inc-var #f (list varname)))
-
-(define (rmt:dec-var varname)
- (rmt:send-receive 'dec-var #f (list varname)))
-
-(define (rmt:add-var varname value)
- (rmt:send-receive 'add-var #f (list varname value)))
-
-;;======================================================================
-;; M U L T I R U N Q U E R I E S
-;;======================================================================
-
-;; Need to move this to multi-run section and make associated changes
-(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
- (let ((run-ids (rmt:get-all-run-ids)))
- (for-each (lambda (run-id)
- (rmt:find-and-mark-incomplete run-id ovr-deadtime))
- run-ids)))
-
-;; get the previous record for when this test was run where all keys match but runname
-;; returns #f if no such test found, returns a single test record if found
-;;
-;; Run this at the client end since we have to connect to multiple run-id dbs
-;;
-(define (rmt:get-previous-test-run-record run-id test-name item-path)
- (let* ((keyvals (rmt:get-key-val-pairs run-id))
- (keys (rmt:get-keys))
- (selstr (string-intersperse keys ","))
- (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
- (if (not keyvals)
- #f
- (let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
- ;; for each run starting with the most recent look to see if there is a matching test
- ;; if found then return that matching test record
- (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
- (if (null? prev-run-ids) #f
- (let loop ((hed (car prev-run-ids))
- (tal (cdr prev-run-ids)))
- (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
- #f #f #f ;; offset limit not-in hide/not-hide
- #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
- (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
- (if (and (null? results)
- (not (null? tal)))
- (loop (car tal)(cdr tal))
- (if (null? results) #f
- (car results))))))))))
-
-(define (rmt:get-run-stats)
- (rmt:send-receive 'get-run-stats #f '()))
-
-;;======================================================================
-;; S T E P S
-;;======================================================================
-
-;; Getting steps is more complicated.
-;;
-;; If given work area
-;; 1. Find the testdat.db file
-;; 2. Open the testdat.db file and do the query
-;; If not given the work area
-;; 1. Do a remote call to get the test path
-;; 2. Continue as above
-;;
-;;(define (rmt:get-steps-for-test run-id test-id)
-;; (rmt:send-receive 'get-steps-data run-id (list test-id)))
-
-(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
- (assert (number? run-id) "FATAL: Run id required.")
- (let* ((state (items:check-valid-items "state" state-in))
- (status (items:check-valid-items "status" status-in)))
- (if (or (not state)(not status))
- (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
- " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
- (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
-
-
-(define (rmt:delete-steps-for-test! run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id)))
-
-(define (rmt:get-steps-for-test run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))
-
-(define (rmt:get-steps-info-by-id run-id test-step-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id)))
-
-;;======================================================================
-;; T E S T D A T A
-;;======================================================================
-
-(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
-
-(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt)))
-
-(define (rmt:get-data-info-by-id run-id test-data-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id)))
-
-(define (rmt:testmeta-add-record testname)
- (rmt:send-receive 'testmeta-add-record #f (list testname)))
-
-(define (rmt:testmeta-get-record testname)
- (rmt:send-receive 'testmeta-get-record #f (list testname)))
-
-(define (rmt:testmeta-update-field test-name fld val)
- (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))
-
-(define (rmt:test-data-rollup run-id test-id status)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
-
-(define (rmt:csv->test-data run-id test-id csvdata)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))
-
-;;======================================================================
-;; T A S K S
-;;======================================================================
-
-(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
- (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))
-
-(define (rmt:tasks-add action owner target runname testpatt params)
- (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))
-
-(define (rmt:tasks-set-state-given-param-key param-key new-state)
- (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state)))
-
-(define (rmt:tasks-get-last target runname)
- (rmt:send-receive 'tasks-get-last #f (list target runname)))
-
-;;======================================================================
-;; N O S Y N C D B
-;;======================================================================
-
-(define (rmt:no-sync-set var val)
- (rmt:send-receive 'no-sync-set #f `(,var ,val)))
-
-(define (rmt:no-sync-get/default var default)
- (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
-
-(define (rmt:no-sync-del! var)
- (rmt:send-receive 'no-sync-del! #f `(,var)))
-
-(define (rmt:no-sync-get-lock keyname)
- (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
-
-;;======================================================================
-;; A R C H I V E S
-;;======================================================================
-
-(define (rmt:archive-get-allocations testname itempath dneeded)
- (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))
-
-(define (rmt:archive-register-block-name bdisk-id archive-path)
- (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))
-
-(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
- (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey)))
-
-(define (rmt:archive-register-disk bdisk-name bdisk-path df)
- (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))
-
-(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
-
-(define (rmt:test-get-archive-block-info archive-block-id)
- (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
-
-
-(define (rmtmod:calc-ro-mode runremote *toppath*)
- (if (and runremote
- (remote-ro-mode-checked runremote))
- (remote-ro-mode runremote)
- (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
- (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
- (if runremote
- (begin
- (remote-ro-mode-set! runremote ro-mode)
- (remote-ro-mode-checked-set! runremote #t)
- ro-mode)
- ro-mode))))
-
-(define (extras-readonly-mode rmt-mutex log-port cmd params)
- (mutex-unlock! rmt-mutex)
- (debug:print-info 12 log-port "rmt:send-receive, case 3")
- (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
- #f)
-
-(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
- (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
- (mutex-lock! *rmt-mutex*)
- (http-transport:close-connections runremote)
- (remote-server-url-set! runremote #f)
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
- (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
-
-(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
- (if (and (vector? res)
- (eq? (vector-length res) 2)
- (eq? (vector-ref res 1) 'overloaded)) ;; since we are
- ;; looking at the
- ;; data to carry the
- ;; error we'll use a
- ;; fairly obtuse
- ;; combo to minimise
- ;; the chances of
- ;; some sort of
- ;; collision. this
- ;; is the case where
- ;; the returned data
- ;; is bad or the
- ;; server is
- ;; overloaded and we
- ;; want to ease off
- ;; the queries
- (let ((wait-delay (+ attemptnum (* attemptnum 10))))
- (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
- (mutex-lock! *rmt-mutex*)
- (http-transport:close-connections runremote)
- (set! *runremote* #f) ;; force starting over
- (mutex-unlock! *rmt-mutex*)
- (thread-sleep! wait-delay)
- (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
- res)) ;; All good, return res
-
-#;(set-functions rmt:send-receive remote-server-url-set!
- http-transport:close-connections remote-conndat-set!
- debug:print debug:print-info
- remote-ro-mode remote-ro-mode-set!
- remote-ro-mode-checked-set! remote-ro-mode-checked)
+;; (rmt:get-all-run-ids)))
+;; (result '()))
+;; (if (null? run-id-list)
+;; '()
+;; (let loop ((hed (car run-id-list))
+;; (tal (cdr run-id-list))
+;; (threads '()))
+;; (if (> (length threads) 5)
+;; (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
+;; (let* ((newthread (make-thread
+;; (lambda ()
+;; (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
+;; (if (list? res)
+;; (begin
+;; (mutex-lock! multi-run-mutex)
+;; (set! result (append result res))
+;; (mutex-unlock! multi-run-mutex))
+;; (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
+;; (conc "multi-run-thread for run-id " hed)))
+;; (newthreads (cons newthread threads)))
+;; (thread-start! newthread)
+;; (thread-sleep! 0.05) ;; give that thread some time to start
+;; (if (null? tal)
+;; newthreads
+;; (loop (car tal)(cdr tal) newthreads))))))
+;; result))
+;;
+;; ;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
+;; ;; ;;
+;; ;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
+;; ;; (let ((run-id-list (if run-ids
+;; ;; run-ids
+;; ;; (rmt:get-all-run-ids))))
+;; ;; (apply append (map (lambda (run-id)
+;; ;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
+;; ;; run-id-list))))
+;;
+;; (define (rmt:delete-test-records run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
+;;
+;; (define (rmt:test-set-state-status run-id test-id state status msg)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))
+;;
+;; (define (rmt:test-toplevel-num-items run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))
+;;
+;; ;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
+;; ;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
+;;
+;; (define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
+;;
+;; (define (rmt:test-get-logfile-info run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
+;;
+;; (define (rmt:test-get-records-for-index-file run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
+;;
+;; (define (rmt:get-testinfo-state-status run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
+;;
+;; (define (rmt:test-set-log! run-id test-id logf)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
+;;
+;; (define (rmt:test-set-top-process-pid run-id test-id pid)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))
+;;
+;; (define (rmt:test-get-top-process-pid run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))
+;;
+;; (define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
+;; (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
+;;
+;; ;; NOTE: This will open and access ALL run databases.
+;; ;;
+;; (define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
+;; (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
+;; (apply append
+;; (map (lambda (run-id)
+;; (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
+;; run-ids))))
+;;
+;; (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
+;;
+;; (define (rmt:get-count-tests-running-for-run-id run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
+;;
+;; (define (rmt:get-not-completed-cnt run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
+;;
+;;
+;; ;; Statistical queries
+;;
+;; (define (rmt:get-count-tests-running run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
+;;
+;; (define (rmt:get-count-tests-running-for-testname run-id testname)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
+;;
+;; (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
+;;
+;; ;; state and status are extra hints not usually used in the calculation
+;; ;;
+;; (define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
+;;
+;; (define (rmt:set-state-status-and-roll-up-run run-id state status)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
+;;
+;;
+;; (define (rmt:update-pass-fail-counts run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
+;;
+;; (define (rmt:top-test-set-per-pf-counts run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))
+;;
+;; (define (rmt:get-raw-run-stats run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))
+;;
+;; (define (rmt:get-test-times runname target)
+;; (rmt:send-receive 'get-test-times #f (list runname target )))
+;;
+;; ;;======================================================================
+;; ;; R U N S
+;; ;;======================================================================
+;;
+;; ;; BUG - LOOK AT HOW THIS WORKS!!!
+;; ;;
+;; (define (rmt:get-run-info run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-info #f (list run-id)))
+;;
+;; (define (rmt:get-num-runs runpatt)
+;; (rmt:send-receive 'get-num-runs #f (list runpatt)))
+;;
+;; (define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
+;; (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys)))
+;;
+;; ;; Use the special run-id == #f scenario here since there is no run yet
+;; (define (rmt:register-run keyvals runname state status user contour)
+;; (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
+;;
+;; (define (rmt:get-run-name-from-id run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-name-from-id #f (list run-id)))
+;;
+;; (define (rmt:delete-run run-id)
+;; (rmt:send-receive 'delete-run #f (list run-id)))
+;;
+;; (define (rmt:update-run-stats run-id stats)
+;; (rmt:send-receive 'update-run-stats #f (list run-id stats)))
+;;
+;; (define (rmt:delete-old-deleted-test-records)
+;; (rmt:send-receive 'delete-old-deleted-test-records #f '()))
+;;
+;; (define (rmt:get-runs runpatt count offset keypatts)
+;; (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))
+;;
+;; (define (rmt:simple-get-runs runpatt count offset target last-update)
+;; (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update)))
+;;
+;; (define (rmt:get-all-run-ids)
+;; (rmt:send-receive 'get-all-run-ids #f '()))
+;;
+;; (define (rmt:get-prev-run-ids run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
+;;
+;; (define (rmt:lock/unlock-run run-id lock unlock user)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
+;;
+;; ;; set/get status
+;; (define (rmt:get-run-status run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-status #f (list run-id)))
+;;
+;; (define (rmt:get-run-state run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-state #f (list run-id)))
+;;
+;;
+;; (define (rmt:set-run-status run-id run-status #!key (msg #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
+;;
+;; (define (rmt:set-run-state-status run-id state status )
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-run-state-status #f (list run-id state status)))
+;;
+;; (define (rmt:update-tesdata-on-repilcate-db old-lt new-lt)
+;; (rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt)))
+;;
+;; (define (rmt:update-run-event_time run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'update-run-event_time #f (list run-id)))
+;;
+;; (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default
+;; (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))
+;;
+;; (define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
+;; (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )
+;;
+;; (define (rmt:get-main-run-stats run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-main-run-stats #f (list run-id)))
+;;
+;; (define (rmt:get-var varname)
+;; (rmt:send-receive 'get-var #f (list varname)))
+;;
+;; (define (rmt:del-var varname)
+;; (rmt:send-receive 'del-var #f (list varname)))
+;;
+;; (define (rmt:set-var varname value)
+;; (rmt:send-receive 'set-var #f (list varname value)))
+;;
+;; (define (rmt:inc-var varname)
+;; (rmt:send-receive 'inc-var #f (list varname)))
+;;
+;; (define (rmt:dec-var varname)
+;; (rmt:send-receive 'dec-var #f (list varname)))
+;;
+;; (define (rmt:add-var varname value)
+;; (rmt:send-receive 'add-var #f (list varname value)))
+;;
+;; ;;======================================================================
+;; ;; M U L T I R U N Q U E R I E S
+;; ;;======================================================================
+;;
+;; ;; Need to move this to multi-run section and make associated changes
+;; (define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
+;; (let ((run-ids (rmt:get-all-run-ids)))
+;; (for-each (lambda (run-id)
+;; (rmt:find-and-mark-incomplete run-id ovr-deadtime))
+;; run-ids)))
+;;
+;; ;; get the previous record for when this test was run where all keys match but runname
+;; ;; returns #f if no such test found, returns a single test record if found
+;; ;;
+;; ;; Run this at the client end since we have to connect to multiple run-id dbs
+;; ;;
+;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
+;; (let* ((keyvals (rmt:get-key-val-pairs run-id))
+;; (keys (rmt:get-keys))
+;; (selstr (string-intersperse keys ","))
+;; (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
+;; (if (not keyvals)
+;; #f
+;; (let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
+;; ;; for each run starting with the most recent look to see if there is a matching test
+;; ;; if found then return that matching test record
+;; (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
+;; (if (null? prev-run-ids) #f
+;; (let loop ((hed (car prev-run-ids))
+;; (tal (cdr prev-run-ids)))
+;; (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
+;; #f #f #f ;; offset limit not-in hide/not-hide
+;; #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
+;; (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
+;; (if (and (null? results)
+;; (not (null? tal)))
+;; (loop (car tal)(cdr tal))
+;; (if (null? results) #f
+;; (car results))))))))))
+;;
+;; (define (rmt:get-run-stats)
+;; (rmt:send-receive 'get-run-stats #f '()))
+;;
+;; ;;======================================================================
+;; ;; S T E P S
+;; ;;======================================================================
+;;
+;; ;; Getting steps is more complicated.
+;; ;;
+;; ;; If given work area
+;; ;; 1. Find the testdat.db file
+;; ;; 2. Open the testdat.db file and do the query
+;; ;; If not given the work area
+;; ;; 1. Do a remote call to get the test path
+;; ;; 2. Continue as above
+;; ;;
+;; ;;(define (rmt:get-steps-for-test run-id test-id)
+;; ;; (rmt:send-receive 'get-steps-data run-id (list test-id)))
+;;
+;; (define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (let* ((state (items:check-valid-items "state" state-in))
+;; (status (items:check-valid-items "status" status-in)))
+;; (if (or (not state)(not status))
+;; (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
+;; " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
+;; (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
+;;
+;;
+;; (define (rmt:delete-steps-for-test! run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id)))
+;;
+;; (define (rmt:get-steps-for-test run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))
+;;
+;; (define (rmt:get-steps-info-by-id run-id test-step-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id)))
+;;
+;; ;;======================================================================
+;; ;; T E S T D A T A
+;; ;;======================================================================
+;;
+;; (define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
+;;
+;; (define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt)))
+;;
+;; (define (rmt:get-data-info-by-id run-id test-data-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id)))
+;;
+;; (define (rmt:testmeta-add-record testname)
+;; (rmt:send-receive 'testmeta-add-record #f (list testname)))
+;;
+;; (define (rmt:testmeta-get-record testname)
+;; (rmt:send-receive 'testmeta-get-record #f (list testname)))
+;;
+;; (define (rmt:testmeta-update-field test-name fld val)
+;; (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))
+;;
+;; (define (rmt:test-data-rollup run-id test-id status)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
+;;
+;; (define (rmt:csv->test-data run-id test-id csvdata)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))
+;;
+;; ;;======================================================================
+;; ;; T A S K S
+;; ;;======================================================================
+;;
+;; (define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
+;; (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))
+;;
+;; (define (rmt:tasks-add action owner target runname testpatt params)
+;; (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))
+;;
+;; (define (rmt:tasks-set-state-given-param-key param-key new-state)
+;; (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state)))
+;;
+;; (define (rmt:tasks-get-last target runname)
+;; (rmt:send-receive 'tasks-get-last #f (list target runname)))
+;;
+;; ;;======================================================================
+;; ;; N O S Y N C D B
+;; ;;======================================================================
+;;
+;; (define (rmt:no-sync-set var val)
+;; (rmt:send-receive 'no-sync-set #f `(,var ,val)))
+;;
+;; (define (rmt:no-sync-get/default var default)
+;; (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
+;;
+;; (define (rmt:no-sync-del! var)
+;; (rmt:send-receive 'no-sync-del! #f `(,var)))
+;;
+;; (define (rmt:no-sync-get-lock keyname)
+;; (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
+;;
+;; ;;======================================================================
+;; ;; A R C H I V E S
+;; ;;======================================================================
+;;
+;; (define (rmt:archive-get-allocations testname itempath dneeded)
+;; (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))
+;;
+;; (define (rmt:archive-register-block-name bdisk-id archive-path)
+;; (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))
+;;
+;; (define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
+;; (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey)))
+;;
+;; (define (rmt:archive-register-disk bdisk-name bdisk-path df)
+;; (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))
+;;
+;; (define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
+;;
+;; (define (rmt:test-get-archive-block-info archive-block-id)
+;; (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
+;;
+;;
+;; (define (rmtmod:calc-ro-mode runremote *toppath*)
+;; (if (and runremote
+;; (remote-ro-mode-checked runremote))
+;; (remote-ro-mode runremote)
+;; (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
+;; (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+;; (if runremote
+;; (begin
+;; (remote-ro-mode-set! runremote ro-mode)
+;; (remote-ro-mode-checked-set! runremote #t)
+;; ro-mode)
+;; ro-mode))))
+;;
+;; (define (extras-readonly-mode rmt-mutex log-port cmd params)
+;; (mutex-unlock! rmt-mutex)
+;; (debug:print-info 12 log-port "rmt:send-receive, case 3")
+;; (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
+;; #f)
+;;
+;; (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
+;; (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
+;; (mutex-lock! *rmt-mutex*)
+;; (http-transport:close-connections runremote)
+;; (remote-server-url-set! runremote #f)
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
+;; (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
+;;
+;; (define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
+;; (if (and (vector? res)
+;; (eq? (vector-length res) 2)
+;; (eq? (vector-ref res 1) 'overloaded)) ;; since we are
+;; ;; looking at the
+;; ;; data to carry the
+;; ;; error we'll use a
+;; ;; fairly obtuse
+;; ;; combo to minimise
+;; ;; the chances of
+;; ;; some sort of
+;; ;; collision. this
+;; ;; is the case where
+;; ;; the returned data
+;; ;; is bad or the
+;; ;; server is
+;; ;; overloaded and we
+;; ;; want to ease off
+;; ;; the queries
+;; (let ((wait-delay (+ attemptnum (* attemptnum 10))))
+;; (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
+;; (mutex-lock! *rmt-mutex*)
+;; (http-transport:close-connections runremote)
+;; (set! *runremote* #f) ;; force starting over
+;; (mutex-unlock! *rmt-mutex*)
+;; (thread-sleep! wait-delay)
+;; (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
+;; res)) ;; All good, return res
+;;
+;; #;(set-functions rmt:send-receive remote-server-url-set!
+;; http-transport:close-connections remote-conndat-set!
+;; debug:print debug:print-info
+;; remote-ro-mode remote-ro-mode-set!
+;; remote-ro-mode-checked-set! remote-ro-mode-checked)
+;;
Index: rmtmod.scm
==================================================================
--- rmtmod.scm
+++ rmtmod.scm
@@ -18,68 +18,10 @@
;;======================================================================
(declare (unit rmtmod))
(declare (uses commonmod))
-(declare (uses apimod))
-;; (declare (uses apimod.import))
-(declare (uses ulex))
-
-;; (include "ulex/ulex.scm")
(module rmtmod
*
-(import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18)
-(import (prefix commonmod cmod:))
-(import apimod)
-(import (prefix ulex ulex:))
-
-(defstruct alldat
- (areapath #f)
- (ulexdat #f)
- )
-
-;;======================================================================
-;; return the handle struct for sending queries to a specific database
-;; - initializes the connection object if this is the first access
-;; - finds the "captain" and asks who to talk to for the given dbfname
-;; - establishes the connection to the current dbowner
-;;
-#;(define (rmt:connect alldat dbfname dbtype)
- (let* ((ulexdat (or (alldat-ulexdat alldat)
- (rmt:setup-ulex alldat))))
- (ulex:connect ulexdat dbfname dbtype)))
-
-;; setup the remote calls
-#;(define (rmt:setup-ulex alldat)
- (let* ((udata (ulex:setup))) ;; establish connection to ulex
- (alldat-ulexdat-set! alldat udata)
- ;; register all needed procs
- (ulex:register-handler udata 'ping cmod:get-full-version) ;; override ping with get-full-version
- (ulex:register-handler udata 'login cmod:get-full-version) ;; force setup of the connection
- (ulex:register-handler udata 'execute api:execute-requests)
- udata))
-
-;; set up a connection to the current owner of the dbfile associated with rid
-;; then send the query to that dbfile owner and wait for a response.
-;;
-#;(define (rmt:send-receive alldat cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
- (let* (;; (alldat *alldat*)
- (areapath (alldat-areapath alldat))
- (dbtype (if (or (not rid)(< rid 1)) ;; this is the criteria for "main.db"
- "main" "runs"))
- (dbfname (if (equal? dbtype "main")
- "main.db"
- (conc rid ".db")))
- (dbfile (conc areapath "/.db/" dbfname))
- (ulexconn (rmt:connect alldat dbfname dbtype)) ;; ulexconn is our new *runremote*, it is a dbowner struct < pdat lastrefresh >
- (udata (alldat-ulexdat alldat)))
- (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid params)))
- ;; need to call this on the other side
- ;; (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
-
- #;(with-input-from-string
- (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid (with-output-to-string (lambda ()(serialize params))))
- (lambda ()(deserialize)))
)
Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -25,11 +25,11 @@
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
-(declare (uses server))
+(declare (uses servermod))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))
(include "common_records.scm")
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -14,857 +14,858 @@
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
-(require-extension (srfi 18) extras tcp s11n)
-
-(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
- directory-utils posix-extras matchable utils)
-
-(use spiffy uri-common intarweb http-client spiffy-request-vars)
-
-(declare (unit server))
-
-(declare (uses commonmod))
-
-(declare (uses common))
-(declare (uses db))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-;; (declare (uses synchash))
-(declare (uses http-transport))
-;;(declare (uses rpc-transport))
-(declare (uses launch))
-;; (declare (uses daemon))
-
-(import commonmod)
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-(define (server:make-server-url hostport)
- (if (not hostport)
- #f
- (conc "http://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-;; Call this to start the actual server
-;;
-
-;;======================================================================
-;; S E R V E R U T I L I T I E S
-;;======================================================================
-
-;; Get the transport
-(define (server:get-transport)
- (if *transport-type*
- *transport-type*
- (let ((ttype (string->symbol
- (or (args:get-arg "-transport")
- (configf:lookup *configdat* "server" "transport")
- "rpc"))))
- (set! *transport-type* ttype)
- ttype)))
-
-;; Generate a unique signature for this server
-(define (server:mk-signature)
- (message-digest-string (md5-primitive)
- (with-output-to-string
- (lambda ()
- (write (list (current-directory)
- (current-process-id)
- (argv)))))))
-
-(define (server:get-client-signature)
- (if *my-client-signature* *my-client-signature*
- (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
-(define (server:get-server-id)
- (if *server-id* *server-id*
- (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
- (set! *server-id* sig)
- *server-id*)))
-
-;; When using zmq this would send the message back (two step process)
-;; with spiffy or rpc this simply returns the return data to be returned
-;;
-(define (server:reply return-addr query-sig success/fail result)
- (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
- ;; (send-message pubsock target send-more: #t)
- ;; (send-message pubsock
- (case (server:get-transport)
- ((rpc) (db:obj->string (vector success/fail query-sig result)))
- ((http) (db:obj->string (vector success/fail query-sig result)))
- ((fs) result)
- (else
- (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
- result)))
-
-;; Given an area path, start a server process ### NOTE ### > file 2>&1
-;; if the target-host is set
-;; try running on that host
-;; incidental: rotate logs in logs/ dir.
-;;
-(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
- (let* ((testsuite (common:get-testsuite-name))
- (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
- (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
- ""))
- (cmdln (conc (common:get-megatest-exe)
- " -server - ";; (or target-host "-")
- (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
- " -daemonize "
- "")
- ;; " -log " logfile
- " -m testsuite:" testsuite
- " " profile-mode
- )) ;; (conc " >> " logfile " 2>&1 &")))))
- (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
- (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
- ;; we want the remote server to start in *toppath* so push there
- (push-directory areapath)
- (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
- (thread-start! log-rotate)
-
- ;; host.domain.tld match host?
- ;; (if (and target-host
- ;; ;; look at target host, is it host.domain.tld or ip address and does it
- ;; ;; match current ip or hostname
- ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
- ;; (not (equal? curr-ip target-host)))
- ;; (begin
- ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
- ;; (setenv "TARGETHOST" target-host)))
- ;;
- (setenv "TARGETHOST_LOGF" logfile)
- (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
- (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
- (system (conc "nbfake " cmdln))
- (unsetenv "TARGETHOST_LOGF")
- ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
- (thread-join! log-rotate)
- (pop-directory)))
-
-;; given a path to a server log return: host port startseconds server-id
-;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
-;; example of what it's looking for in the log file:
-;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
-
-(define (server:logf-get-start-info logf)
- (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
- (dbprep-rx (regexp "^SERVER: dbprep"))
- (dbprep-found 0)
- (bad-dat (list #f #f #f #f #f)))
- (handle-exceptions
- exn
- (begin
- ;; WARNING: this is potentially dangerous to blanket ignore the errors
- (if (file-exists? logf)
- (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
- bad-dat) ;; no idea what went wrong, call it a bad server
- (with-input-from-file
- logf
- (lambda ()
- (let loop ((inl (read-line))
- (lnum 0))
- (if (not (eof-object? inl))
- (let ((mlst (string-match server-rx inl))
- (dbprep (string-match dbprep-rx inl)))
- (if dbprep (set! dbprep-found 1))
- (if (not mlst)
- (if (< lnum 500) ;; give up if more than 500 lines of server log read
- (loop (read-line)(+ lnum 1))
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
- bad-dat))
- (match mlst
- ((_ host port start server-id pid)
- (list host
- (string->number port)
- (string->number start)
- server-id
- (string->number pid)))
- (else
- (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
- bad-dat))))
- (begin
- (if dbprep-found
- (begin
- (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
- (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
- (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
- bad-dat))))))))
-
-;; ;; get a list of servers from the log files, with all relevant data
-;; ;; ( mod-time host port start-time pid )
-;; ;;
-;; (define (server:get-list areapath #!key (limit #f))
-;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
-;; (day-seconds (* 24 60 60)))
-;; ;; if the directory exists continue to get the list
-;; ;; otherwise attempt to create the logs dir and then
-;; ;; continue
-;; (if (if (directory-exists? (conc areapath "/logs"))
-;; '()
-;; (if (file-write-access? areapath)
-;; (begin
-;; (condition-case
-;; (create-directory (conc areapath "/logs") #t)
-;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
-;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
-;; (directory-exists? (conc areapath "/logs")))
-;; '()))
-;;
-;; ;; Get the list of server logs.
-;; (let* (
-;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
-;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
-;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
-;; (num-serv-logs (length server-logs)))
-;; (if (or (null? server-logs) (= num-serv-logs 0))
-;; (let ()
-;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
-;; '()
-;; )
-;; (let loop ((hed (string-chomp (car server-logs)))
-;; (tal (cdr server-logs))
-;; (res '()))
-;; (let* ((mod-time (handle-exceptions
-;; exn
-;; (begin
-;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
-;; (current-seconds)) ;; 0
-;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
-;; (down-time (- (current-seconds) mod-time))
-;; (serv-dat (if (or (< num-serv-logs 10)
-;; (< down-time 900)) ;; day-seconds))
-;; (server:logf-get-start-info hed)
-;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
-;; (serv-rec (cons mod-time serv-dat))
-;; (fmatch (string-match fname-rx hed))
-;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
-;; (new-res (if (null? serv-dat)
-;; res
-;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
-;; (if (null? tal)
-;; (if (and limit
-;; (> (length new-res) limit))
-;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
-;; new-res)
-;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
-
-#;(define (server:get-num-alive srvlst)
- (let ((num-alive 0))
- (for-each
- (lambda (server)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
- (match-let (((mod-time host port start-time server-id pid)
- server))
- (let* ((uptime (- (current-seconds) mod-time))
- (runtime (if start-time
- (- mod-time start-time)
- 0)))
- (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
- srvlst)
- num-alive))
-
-;; ;; given a list of servers get a list of valid servers, i.e. at least
-;; ;; 10 seconds old, has started and is less than 1 hour old and is
-;; ;; active (i.e. mod-time < 10 seconds
-;; ;;
-;; ;; mod-time host port start-time pid
-;; ;;
-;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
-;; ;; and servers should stick around for about two hours or so.
-;; ;;
-;; (define (server:get-best srvlst)
-;; (let* ((nums (server:get-num-servers))
-;; (now (current-seconds))
-;; (slst (sort
-;; (filter (lambda (rec)
-;; (if (and (list? rec)
-;; (> (length rec) 2))
-;; (let ((start-time (list-ref rec 3))
-;; (mod-time (list-ref rec 0)))
-;; ;; (print "start-time: " start-time " mod-time: " mod-time)
-;; (and start-time mod-time
-;; (> (- now start-time) 0) ;; been running at least 0 seconds
-;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
-;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
-;; (< (- now start-time)
-;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
-;; 180)
-;; (random 360)))) ;; under one hour running time +/- 180
-;; ))
-;; #f))
-;; srvlst)
-;; (lambda (a b)
-;; (< (list-ref a 3)
-;; (list-ref b 3))))))
-;; (if (> (length slst) nums)
-;; (take slst nums)
-;; slst)))
-
-;; ;; switch from server:get-list to server:get-servers-info
-;; ;;
-;; (define (server:get-first-best areapath)
-;; (let ((srvrs (server:get-best (server:get-list areapath))))
-;; (if (and srvrs
-;; (not (null? srvrs)))
-;; (car srvrs)
-;; #f)))
-;;
-;; (define (server:get-rand-best areapath)
-;; (let ((srvrs (server:get-best (server:get-list areapath))))
-;; (if (and (list? srvrs)
-;; (not (null? srvrs)))
-;; (let* ((len (length srvrs))
-;; (idx (random len)))
-;; (list-ref srvrs idx))
-;; #f)))
-
-(define (server:record->id servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
- #f)
- (match-let (((host port start-time server-id pid)
- servr))
- (if server-id
- server-id
- #f))))
-
-(define (server:record->url servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
- #f)
- (match-let (((host port start-time server-id pid)
- servr))
- (if (and host port)
- (conc host ":" port)
- #f))))
-
-
-;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
-;; if it is old enough, overwrite it and wait 0.25 seconds.
-;; if it then has the wrong server key, wait + 1 and call this function recursively.
-;;
-#;(define (server:wait-for-server-start-last-flag areapath)
- (let* ((start-flag (conc areapath "/logs/server-start-last"))
- ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
- (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
- (server-key (conc (get-host-name) "-" (current-process-id))))
- (if (file-exists? start-flag)
- (let* ((fmodtime (file-modification-time start-flag))
- (delta (- (current-seconds) fmodtime))
- (old-enough (> delta idletime))
- (new-server-key ""))
- ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t.
- ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
- (if (and old-enough
- (begin
- (debug:print-info 2 *default-log-port* "Writing " start-flag)
- (with-output-to-file start-flag (lambda () (print server-key)))
- (thread-sleep! 0.25)
- (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
- (equal? server-key new-server-key)))
- #t
- ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively.
- (begin
- (debug:print-info 0 *default-log-port* "Gating server start, last start: "
- (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
-
- (thread-sleep! ( + 1 idletime))
- (server:wait-for-server-start-last-flag areapath)))))))
-
-;; oldest server alive determines host then choose random of youngest
-;; five servers on that host
-;;
-(define (server:get-servers-info areapath)
- ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
- (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
- (if (not (file-exists? servinfodir))
- (create-directory servinfodir))
- (let* ((allfiles (glob (conc servinfodir"/*")))
- (res (make-hash-table)))
- (for-each
- (lambda (f)
- (let* ((hostport (pathname-strip-directory f))
- (serverdat (server:logf-get-start-info f)))
- (match serverdat
- ((host port start server-id pid)
- (if (and host port start server-id pid)
- (hash-table-set! res hostport serverdat)
- (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
- (else
- (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
- allfiles)
- res)))
-
-;; check the .servinfo directory, are there other servers running on this
-;; or another host?
-;;
-;; returns #t => ok to start another server
-;; #f => not ok to start another server
-;;
-(define (server:minimal-check areapath)
- (server:clean-up-old areapath)
- (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
- (servrs (glob (conc srvdir"/*")))
- (thishostip (server:get-best-guess-address (get-host-name)))
- (thisservrs (glob (conc srvdir"/"thishostip":*")))
- (homehostinf (server:choose-server areapath 'homehost))
- (havehome (car homehostinf))
- (wearehome (cdr homehostinf)))
- (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
- ", numservers: "(length thisservrs))
- (cond
- ((not havehome) #t) ;; no homehost yet, go for it
- ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
- ((and havehome (not wearehome)) #f) ;; we are not the home host
- ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
- (else
- (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
- #t))))
-
-
-(define server-last-start 0)
-
-
-;; oldest server alive determines host then choose random of youngest
-;; five servers on that host
-;;
-;; mode:
-;; best - get best server (random of newest five)
-;; home - get home host based on oldest server
-;; info - print info
-(define (server:choose-server areapath #!optional (mode 'best))
- ;; age is current-starttime
- ;; find oldest alive
- ;; 1. sort by age ascending and ping until good
- ;; find alive rand from youngest
- ;; 1. sort by age descending
- ;; 2. take five
- ;; 3. check alive, discard if not and repeat
- ;; first we clean up old server files
- (server:clean-up-old areapath)
- (let* ((since-last (- (current-seconds) server-last-start))
- (server-start-delay 10))
- (if ( < (- (current-seconds) server-last-start) 10 )
- (begin
- (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
- (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
- (thread-sleep! server-start-delay)
- )
- (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
- )
- )
- (let* ((serversdat (server:get-servers-info areapath))
- (servkeys (hash-table-keys serversdat))
- (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
- (sort servkeys ;; list of "host:port"
- (lambda (a b)
- (>= (list-ref (hash-table-ref serversdat a) 2)
- (list-ref (hash-table-ref serversdat b) 2))))
- '())))
- (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
- (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
- (if (not (null? by-time-asc))
- (let* ((oldest (last by-time-asc))
- (oldest-dat (hash-table-ref serversdat oldest))
- (host (list-ref oldest-dat 0))
- (all-valid (filter (lambda (x)
- (equal? host (list-ref (hash-table-ref serversdat x) 0)))
- by-time-asc))
- (best-ten (lambda ()
- (if (> (length all-valid) 11)
- (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
- (if (> (length all-valid) 8)
- (drop-right all-valid 1)
- all-valid))))
- (names->dats (lambda (names)
- (map (lambda (x)
- (hash-table-ref serversdat x))
- names)))
- (am-home? (lambda ()
- (let* ((currhost (get-host-name))
- (bestadrs (server:get-best-guess-address currhost)))
- (or (equal? host currhost)
- (equal? host bestadrs))))))
- (case mode
- ((info)
- (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
- (print "youngest: "(hash-table-ref serversdat (car all-valid))))
- ((home) host)
- ((homehost) (cons host (am-home?))) ;; shut up old code
- ((home?) (am-home?))
- ((best-ten)(names->dats (best-ten)))
- ((all-valid)(names->dats all-valid))
- ((best) (let* ((best-ten (best-ten))
- (len (length best-ten)))
- (hash-table-ref serversdat (list-ref best-ten (random len)))))
- ((count)(length all-valid))
- (else
- (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
- #f)))
- (begin
- (server:run areapath)
- (set! server-last-start (current-seconds))
- ;; (thread-sleep! 3)
- (case mode
- ((homehost) (cons #f #f))
- (else #f))))))
-
-(define (server:get-servinfo-dir areapath)
- (let* ((spath (conc areapath"/.servinfo")))
- (if (not (file-exists? spath))
- (create-directory spath #t))
- spath))
-
-(define (server:clean-up-old areapath)
- ;; any server file that has not been touched in ten minutes is effectively dead
- (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
- (for-each
- (lambda (sfile)
- (let* ((modtime (handle-exceptions
- exn
- (begin
- (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
- (current-seconds))
- (file-modification-time sfile))))
- (if (and (number? modtime)
- (> (- (current-seconds) modtime)
- 600))
- (begin
- (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
- (delete-file sfile))))))
- sfiles)))
-
-;; would like to eventually get rid of this
-;;
-(define (common:on-homehost?)
- (server:choose-server *toppath* 'home?))
-
-;; kind start up of server, wait before allowing another server for a given
-;; area to be launched
-;;
-(define (server:kind-run areapath)
- ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
- ;; and wait for it to be at least seconds old
- ;; (server:wait-for-server-start-last-flag areapath)
- (let loop ()
- (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
- (begin
- (if (common:low-noise-print 30 "our-host-load")
- (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
- (loop))))
- (if (< (server:choose-server areapath 'count) 20)
- (server:run areapath))
- #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
- (let* ((lock-file (conc areapath "/logs/server-start.lock")))
- (let* ((start-flag (conc areapath "/logs/server-start-last")))
- (common:simple-file-lock-and-wait lock-file expire-time: 25)
- (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
- (system (conc "touch " start-flag)) ;; lazy but safe
- (server:run areapath)
- (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
- (common:simple-file-release-lock lock-file)))
- (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
-
-;; this one seems to be the general entry point
-;;
-(define (server:start-and-wait areapath #!key (timeout 60))
- (let ((give-up-time (+ (current-seconds) timeout)))
- (let loop ((server-info (server:check-if-running areapath))
- (try-num 0))
- (if (or server-info
- (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
- (server:record->url server-info)
- (let* ( (servers (server:choose-server areapath 'all-valid))
- (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
- (if (and (> try-num 0) ;; first time through simply wait a little while then try again
- (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
- (server:run areapath))
- (thread-sleep! 5)
- (loop (server:check-if-running areapath)
- (+ try-num 1)))))))
-
-(define (server:get-num-servers #!key (numservers 2))
- (let ((ns (string->number
- (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
- (or ns numservers)))
-
-;; no longer care if multiple servers are started by accident. older servers will drop off in time.
-;;
-(define (server:check-if-running areapath) ;; #!key (numservers "2"))
- (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
- (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
- (if (or (and servers
- (null? servers))
- (not servers))
- ;; (and (list? servers)
- ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
- #f
- (let loop ((hed (car servers))
- (tal (cdr servers)))
- (let ((res (server:check-server hed)))
- (if res
- hed
- (if (null? tal)
- #f
- (loop (car tal)(cdr tal)))))))))
-
-;; ping the given server
-;;
-(define (server:check-server server-record)
- (let* ((server-url (server:record->url server-record))
- (server-id (server:record->id server-record))
- (res (server:ping server-url server-id)))
- (if res
- server-url
- #f)))
-
-(define (server:kill servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
- #f)
- (match-let (((mod-time hostname port start-time server-id pid)
- servr))
- (tasks:kill-server hostname pid))))
-
-;; called in megatest.scm, host-port is string hostname:port
-;;
-;; NOTE: This is NOT called directly from clients as not all transports support a client running
-;; in the same process as the server.
-;;
-(define (server:ping host:port server-id #!key (do-exit #f))
- (let* ((host-port (cond
- ((string? host:port)
- (let ((slst (string-split host:port ":")))
- (if (eq? (length slst) 2)
- (list (car slst)(string->number (cadr slst)))
- #f)))
- (else
- #f))))
- (cond
- ((and (list? host-port)
- (eq? (length host-port) 2))
- (let* ((myrunremote (make-remote))
- (iface (car host-port))
- (port (cadr host-port))
- (server-dat (client:connect iface port server-id myrunremote))
- (login-res (rmt:login-no-auto-client-setup myrunremote)))
- (if (and (list? login-res)
- (car login-res))
- (begin
- ;; (print "LOGIN_OK")
- (if do-exit (exit 0))
- #t)
- (begin
- ;; (print "LOGIN_FAILED")
- (if do-exit (exit 1))
- #f))))
- (else
- (if host:port
- (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
- (if do-exit
- (exit 1)
- #f)))))
-
-;; run ping in separate process, safest way in some cases
-;;
-(define (server:ping-server ifaceport)
- (with-input-from-pipe
- (conc (common:get-megatest-exe) " -ping " ifaceport)
- (lambda ()
- (let loop ((inl (read-line))
- (res "NOREPLY"))
- (if (eof-object? inl)
- (case (string->symbol res)
- ((NOREPLY) #f)
- ((LOGIN_OK) #t)
- (else #f))
- (loop (read-line) inl))))))
-
-;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
-;;
-(define (server:login toppath)
- (lambda (toppath)
- (set! *db-last-access* (current-seconds)) ;; might not be needed.
- (if (equal? *toppath* toppath)
- #t
- #f)))
-
-;; timeout is hms string: 1h 5m 3s, default is 1 minute
-;; This is currently broken. Just use the number of hours with no unit.
-;; Default is 60 seconds.
-;;
-(define (server:expiration-timeout)
- (let ((tmo (configf:lookup *configdat* "server" "timeout")))
- (if (and (string? tmo)
- (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
- (* 3600 (string->number tmo))
- 600)))
-
-(define (server:get-best-guess-address hostname)
- (let ((res #f))
- (for-each
- (lambda (adr)
- (if (not (eq? (u8vector-ref adr 0) 127))
- (set! res adr)))
- ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
- (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
- (string-intersperse
- (map number->string
- (u8vector->list
- (if res res (hostname->ip hostname)))) ".")))
-
-;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
-;; (define (server:release-sync-lock)
-;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
-;; (define (server:have-sync-lock?)
-;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
-;; (have-lock? (car have-lock-pair))
-;; (lock-time (cdr have-lock-pair))
-;; (lock-age (- (current-seconds) lock-time)))
-;; (cond
-;; (have-lock? #t)
-;; ((>lock-age
-;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
-;; (server:release-sync-lock)
-;; (server:have-sync-lock?))
-;; (else #f))))
-
-;; moving this here as it needs access to db and cannot be in common.
-;;
-
-(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
- (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
- (lambda ()
- (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
- #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
- (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
- (tmp-area (common:get-db-tmp-area))
- (tmp-db (conc tmp-area "/megatest.db"))
- (staging-file (conc *toppath* "/.megatest.db"))
- (mtdbfile (conc *toppath* "/megatest.db"))
- (lockfile (common:get-sync-lock-filepath))
- (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
- (sync-cmd (if fork-to-background
- (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
- sync-cmd-core))
- (default-min-intersync-delay 2)
- (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
- (default-duty-cycle 0.1)
- (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
- (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
- (calculate-off-time (lambda (work-duration duty-cycle)
- (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
- (off-time min-intersync-delay) ;; adjusted in closure below.
- (do-a-sync
- (lambda ()
- (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
- (let* ((finalres
- (let retry-loop ((num-tries 0))
- (if (common:simple-file-lock lockfile)
- (begin
- (cond
- ((not (or fork-to-background persist-until-sync))
- (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
- " , off-time="off-time" seconds ]")
- (thread-sleep! (max off-time min-intersync-delay)))
- (else
- (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
-
- (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
- (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
- (delete-file* staging-file)
- (let* ((start-time (current-milliseconds))
- (res (system sync-cmd))
- (dbbackupfile (conc mtdbfile ".backup"))
- (res2
- (cond
- ((eq? 0 res )
- (handle-exceptions
- exn
- #f
- (if (file-exists? dbbackupfile)
- (delete-file* dbbackupfile)
- )
- (if (eq? 0 (file-size sync-log))
- (delete-file* sync-log))
- (system (conc "/bin/mv " staging-file " " mtdbfile))
-
- (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
- (set! off-time (calculate-off-time
- last-sync-seconds
- (cond
- ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
- duty-cycle)
- (else
- (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
- default-duty-cycle))))
-
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
- 'sync-completed))
- (else
- (system (conc "/bin/cp "sync-log" "sync-log".fail"))
- (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
- (if (file-exists? (conc mtdbfile ".backup"))
- (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
- #f))))
- (common:simple-file-release-lock lockfile)
- (BB> "released lockfile: " lockfile)
- (when (common:file-exists? lockfile)
- (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
- res2) ;; end let
- );; end begin
- ;; else
- (cond
- (persist-until-sync
- (thread-sleep! 1)
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
- (retry-loop (add1 num-tries)))
- (else
- (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
- 'parallel-sync-in-progress))
- ) ;; end if got lockfile
- )
- ))
- (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
- finalres)
- ) ;; end lambda
- ))
- do-a-sync))
-
+;; (require-extension (srfi 18) extras tcp s11n)
+;;
+;; (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
+;; directory-utils posix-extras matchable utils)
+;;
+;; (use spiffy uri-common intarweb http-client spiffy-request-vars)
+;;
+;; (declare (unit server))
+;;
+;; (declare (uses commonmod))
+;;
+;; (declare (uses common))
+;; (declare (uses db))
+;; (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
+;; ;; (declare (uses synchash))
+;; (declare (uses http-transport))
+;; ;;(declare (uses rpc-transport))
+;; (declare (uses launch))
+;; ;; (declare (uses daemon))
+;;
+;; (import commonmod)
+;;
+;; (include "common_records.scm")
+;; (include "db_records.scm")
+;;
+;; (define (server:make-server-url hostport)
+;; (if (not hostport)
+;; #f
+;; (conc "http://" (car hostport) ":" (cadr hostport))))
+;;
+;; (define *server-loop-heart-beat* (current-seconds))
+;;
+;; ;;======================================================================
+;; ;; P K T S S T U F F
+;; ;;======================================================================
+;;
+;; ;; ???
+;;
+;; ;;======================================================================
+;; ;; P K T S S T U F F
+;; ;;======================================================================
+;;
+;; ;; ???
+;;
+;; ;;======================================================================
+;; ;; S E R V E R
+;; ;;======================================================================
+;;
+;; ;; Call this to start the actual server
+;; ;;
+;;
+;; ;;======================================================================
+;; ;; S E R V E R U T I L I T I E S
+;; ;;======================================================================
+;;
+;; ;; Get the transport
+;; (define (server:get-transport)
+;; (if *transport-type*
+;; *transport-type*
+;; (let ((ttype (string->symbol
+;; (or (args:get-arg "-transport")
+;; (configf:lookup *configdat* "server" "transport")
+;; "rpc"))))
+;; (set! *transport-type* ttype)
+;; ttype)))
+;;
+;; ;; Generate a unique signature for this server
+;; (define (server:mk-signature)
+;; (message-digest-string (md5-primitive)
+;; (with-output-to-string
+;; (lambda ()
+;; (write (list (current-directory)
+;; (current-process-id)
+;; (argv)))))))
+;;
+;; (define (server:get-client-signature)
+;; (if *my-client-signature* *my-client-signature*
+;; (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
+;; (set! *my-client-signature* sig)
+;; *my-client-signature*)))
+;;
+;; (define (server:get-server-id)
+;; (if *server-id* *server-id*
+;; (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
+;; (set! *server-id* sig)
+;; *server-id*)))
+;;
+;; ;; When using zmq this would send the message back (two step process)
+;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;;
+;; (define (server:reply return-addr query-sig success/fail result)
+;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;; ;; (send-message pubsock target send-more: #t)
+;; ;; (send-message pubsock
+;; (case (server:get-transport)
+;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
+;; ((http) (db:obj->string (vector success/fail query-sig result)))
+;; ((fs) result)
+;; (else
+;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;; result)))
+;;
+;; ;; Given an area path, start a server process ### NOTE ### > file 2>&1
+;; ;; if the target-host is set
+;; ;; try running on that host
+;; ;; incidental: rotate logs in logs/ dir.
+;; ;;
+;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+;; (let* ((testsuite (common:get-testsuite-name))
+;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+;; (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
+;; ""))
+;; (cmdln (conc (common:get-megatest-exe)
+;; " -server - ";; (or target-host "-")
+;; (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+;; " -daemonize "
+;; "")
+;; ;; " -log " logfile
+;; " -m testsuite:" testsuite
+;; " " profile-mode
+;; )) ;; (conc " >> " logfile " 2>&1 &")))))
+;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
+;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
+;; ;; we want the remote server to start in *toppath* so push there
+;; (push-directory areapath)
+;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+;; (thread-start! log-rotate)
+;;
+;; ;; host.domain.tld match host?
+;; ;; (if (and target-host
+;; ;; ;; look at target host, is it host.domain.tld or ip address and does it
+;; ;; ;; match current ip or hostname
+;; ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+;; ;; (not (equal? curr-ip target-host)))
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+;; ;; (setenv "TARGETHOST" target-host)))
+;; ;;
+;; (setenv "TARGETHOST_LOGF" logfile)
+;; (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
+;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
+;; (system (conc "nbfake " cmdln))
+;; (unsetenv "TARGETHOST_LOGF")
+;; ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+;; (thread-join! log-rotate)
+;; (pop-directory)))
+;;
+;; ;; given a path to a server log return: host port startseconds server-id
+;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
+;; ;; example of what it's looking for in the log file:
+;; ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
+;;
+;; (define (server:logf-get-start-info logf)
+;; (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
+;; (dbprep-rx (regexp "^SERVER: dbprep"))
+;; (dbprep-found 0)
+;; (bad-dat (list #f #f #f #f #f)))
+;; (handle-exceptions
+;; exn
+;; (begin
+;; ;; WARNING: this is potentially dangerous to blanket ignore the errors
+;; (if (file-exists? logf)
+;; (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
+;; bad-dat) ;; no idea what went wrong, call it a bad server
+;; (with-input-from-file
+;; logf
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (lnum 0))
+;; (if (not (eof-object? inl))
+;; (let ((mlst (string-match server-rx inl))
+;; (dbprep (string-match dbprep-rx inl)))
+;; (if dbprep (set! dbprep-found 1))
+;; (if (not mlst)
+;; (if (< lnum 500) ;; give up if more than 500 lines of server log read
+;; (loop (read-line)(+ lnum 1))
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
+;; bad-dat))
+;; (match mlst
+;; ((_ host port start server-id pid)
+;; (list host
+;; (string->number port)
+;; (string->number start)
+;; server-id
+;; (string->number pid)))
+;; (else
+;; (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
+;; bad-dat))))
+;; (begin
+;; (if dbprep-found
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
+;; (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
+;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
+;; bad-dat))))))))
+;;
+;; ;; ;; get a list of servers from the log files, with all relevant data
+;; ;; ;; ( mod-time host port start-time pid )
+;; ;; ;;
+;; ;; (define (server:get-list areapath #!key (limit #f))
+;; ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
+;; ;; (day-seconds (* 24 60 60)))
+;; ;; ;; if the directory exists continue to get the list
+;; ;; ;; otherwise attempt to create the logs dir and then
+;; ;; ;; continue
+;; ;; (if (if (directory-exists? (conc areapath "/logs"))
+;; ;; '()
+;; ;; (if (file-write-access? areapath)
+;; ;; (begin
+;; ;; (condition-case
+;; ;; (create-directory (conc areapath "/logs") #t)
+;; ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
+;; ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
+;; ;; (directory-exists? (conc areapath "/logs")))
+;; ;; '()))
+;; ;;
+;; ;; ;; Get the list of server logs.
+;; ;; (let* (
+;; ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
+;; ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
+;; ;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
+;; ;; (num-serv-logs (length server-logs)))
+;; ;; (if (or (null? server-logs) (= num-serv-logs 0))
+;; ;; (let ()
+;; ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
+;; ;; '()
+;; ;; )
+;; ;; (let loop ((hed (string-chomp (car server-logs)))
+;; ;; (tal (cdr server-logs))
+;; ;; (res '()))
+;; ;; (let* ((mod-time (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
+;; ;; (current-seconds)) ;; 0
+;; ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
+;; ;; (down-time (- (current-seconds) mod-time))
+;; ;; (serv-dat (if (or (< num-serv-logs 10)
+;; ;; (< down-time 900)) ;; day-seconds))
+;; ;; (server:logf-get-start-info hed)
+;; ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
+;; ;; (serv-rec (cons mod-time serv-dat))
+;; ;; (fmatch (string-match fname-rx hed))
+;; ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
+;; ;; (new-res (if (null? serv-dat)
+;; ;; res
+;; ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
+;; ;; (if (null? tal)
+;; ;; (if (and limit
+;; ;; (> (length new-res) limit))
+;; ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
+;; ;; new-res)
+;; ;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
+;;
+;; #;(define (server:get-num-alive srvlst)
+;; (let ((num-alive 0))
+;; (for-each
+;; (lambda (server)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
+;; (match-let (((mod-time host port start-time server-id pid)
+;; server))
+;; (let* ((uptime (- (current-seconds) mod-time))
+;; (runtime (if start-time
+;; (- mod-time start-time)
+;; 0)))
+;; (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
+;; srvlst)
+;; num-alive))
+;;
+;; ;; ;; given a list of servers get a list of valid servers, i.e. at least
+;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is
+;; ;; ;; active (i.e. mod-time < 10 seconds
+;; ;; ;;
+;; ;; ;; mod-time host port start-time pid
+;; ;; ;;
+;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
+;; ;; ;; and servers should stick around for about two hours or so.
+;; ;; ;;
+;; ;; (define (server:get-best srvlst)
+;; ;; (let* ((nums (server:get-num-servers))
+;; ;; (now (current-seconds))
+;; ;; (slst (sort
+;; ;; (filter (lambda (rec)
+;; ;; (if (and (list? rec)
+;; ;; (> (length rec) 2))
+;; ;; (let ((start-time (list-ref rec 3))
+;; ;; (mod-time (list-ref rec 0)))
+;; ;; ;; (print "start-time: " start-time " mod-time: " mod-time)
+;; ;; (and start-time mod-time
+;; ;; (> (- now start-time) 0) ;; been running at least 0 seconds
+;; ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
+;; ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
+;; ;; (< (- now start-time)
+;; ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
+;; ;; 180)
+;; ;; (random 360)))) ;; under one hour running time +/- 180
+;; ;; ))
+;; ;; #f))
+;; ;; srvlst)
+;; ;; (lambda (a b)
+;; ;; (< (list-ref a 3)
+;; ;; (list-ref b 3))))))
+;; ;; (if (> (length slst) nums)
+;; ;; (take slst nums)
+;; ;; slst)))
+;;
+;; ;; ;; switch from server:get-list to server:get-servers-info
+;; ;; ;;
+;; ;; (define (server:get-first-best areapath)
+;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; (if (and srvrs
+;; ;; (not (null? srvrs)))
+;; ;; (car srvrs)
+;; ;; #f)))
+;; ;;
+;; ;; (define (server:get-rand-best areapath)
+;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; (if (and (list? srvrs)
+;; ;; (not (null? srvrs)))
+;; ;; (let* ((len (length srvrs))
+;; ;; (idx (random len)))
+;; ;; (list-ref srvrs idx))
+;; ;; #f)))
+;;
+;; (define (server:record->id servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((host port start-time server-id pid)
+;; servr))
+;; (if server-id
+;; server-id
+;; #f))))
+;;
+;; (define (server:record->url servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((host port start-time server-id pid)
+;; servr))
+;; (if (and host port)
+;; (conc host ":" port)
+;; #f))))
+;;
+;;
+;; ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
+;; ;; if it is old enough, overwrite it and wait 0.25 seconds.
+;; ;; if it then has the wrong server key, wait + 1 and call this function recursively.
+;; ;;
+;; #;(define (server:wait-for-server-start-last-flag areapath)
+;; (let* ((start-flag (conc areapath "/logs/server-start-last"))
+;; ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
+;; (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
+;; (server-key (conc (get-host-name) "-" (current-process-id))))
+;; (if (file-exists? start-flag)
+;; (let* ((fmodtime (file-modification-time start-flag))
+;; (delta (- (current-seconds) fmodtime))
+;; (old-enough (> delta idletime))
+;; (new-server-key ""))
+;; ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t.
+;; ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
+;; (if (and old-enough
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Writing " start-flag)
+;; (with-output-to-file start-flag (lambda () (print server-key)))
+;; (thread-sleep! 0.25)
+;; (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
+;; (equal? server-key new-server-key)))
+;; #t
+;; ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively.
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Gating server start, last start: "
+;; (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
+;;
+;; (thread-sleep! ( + 1 idletime))
+;; (server:wait-for-server-start-last-flag areapath)))))))
+;;
+;; ;; oldest server alive determines host then choose random of youngest
+;; ;; five servers on that host
+;; ;;
+;; (define (server:get-servers-info areapath)
+;; ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
+;; (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
+;; (if (not (file-exists? servinfodir))
+;; (create-directory servinfodir))
+;; (let* ((allfiles (glob (conc servinfodir"/*")))
+;; (res (make-hash-table)))
+;; (for-each
+;; (lambda (f)
+;; (let* ((hostport (pathname-strip-directory f))
+;; (serverdat (server:logf-get-start-info f)))
+;; (match serverdat
+;; ((host port start server-id pid)
+;; (if (and host port start server-id pid)
+;; (hash-table-set! res hostport serverdat)
+;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
+;; (else
+;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
+;; allfiles)
+;; res)))
+;;
+;; ;; check the .servinfo directory, are there other servers running on this
+;; ;; or another host?
+;; ;;
+;; ;; returns #t => ok to start another server
+;; ;; #f => not ok to start another server
+;; ;;
+;; (define (server:minimal-check areapath)
+;; (server:clean-up-old areapath)
+;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
+;; (servrs (glob (conc srvdir"/*")))
+;; (thishostip (server:get-best-guess-address (get-host-name)))
+;; (thisservrs (glob (conc srvdir"/"thishostip":*")))
+;; (homehostinf (server:choose-server areapath 'homehost))
+;; (havehome (car homehostinf))
+;; (wearehome (cdr homehostinf)))
+;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
+;; ", numservers: "(length thisservrs))
+;; (cond
+;; ((not havehome) #t) ;; no homehost yet, go for it
+;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
+;; ((and havehome (not wearehome)) #f) ;; we are not the home host
+;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
+;; (else
+;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
+;; #t))))
+;;
+;;
+;; (define server-last-start 0)
+;;
+;;
+;; ;; oldest server alive determines host then choose random of youngest
+;; ;; five servers on that host
+;; ;;
+;; ;; mode:
+;; ;; best - get best server (random of newest five)
+;; ;; home - get home host based on oldest server
+;; ;; info - print info
+;; (define (server:choose-server areapath #!optional (mode 'best))
+;; ;; age is current-starttime
+;; ;; find oldest alive
+;; ;; 1. sort by age ascending and ping until good
+;; ;; find alive rand from youngest
+;; ;; 1. sort by age descending
+;; ;; 2. take five
+;; ;; 3. check alive, discard if not and repeat
+;; ;; first we clean up old server files
+;; (server:clean-up-old areapath)
+;; (let* ((since-last (- (current-seconds) server-last-start))
+;; (server-start-delay 10))
+;; (if ( < (- (current-seconds) server-last-start) 10 )
+;; (begin
+;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
+;; (thread-sleep! server-start-delay)
+;; )
+;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; )
+;; )
+;; (let* ((serversdat (server:get-servers-info areapath))
+;; (servkeys (hash-table-keys serversdat))
+;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
+;; (sort servkeys ;; list of "host:port"
+;; (lambda (a b)
+;; (>= (list-ref (hash-table-ref serversdat a) 2)
+;; (list-ref (hash-table-ref serversdat b) 2))))
+;; '())))
+;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
+;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
+;; (if (not (null? by-time-asc))
+;; (let* ((oldest (last by-time-asc))
+;; (oldest-dat (hash-table-ref serversdat oldest))
+;; (host (list-ref oldest-dat 0))
+;; (all-valid (filter (lambda (x)
+;; (equal? host (list-ref (hash-table-ref serversdat x) 0)))
+;; by-time-asc))
+;; (best-ten (lambda ()
+;; (if (> (length all-valid) 11)
+;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
+;; (if (> (length all-valid) 8)
+;; (drop-right all-valid 1)
+;; all-valid))))
+;; (names->dats (lambda (names)
+;; (map (lambda (x)
+;; (hash-table-ref serversdat x))
+;; names)))
+;; (am-home? (lambda ()
+;; (let* ((currhost (get-host-name))
+;; (bestadrs (server:get-best-guess-address currhost)))
+;; (or (equal? host currhost)
+;; (equal? host bestadrs))))))
+;; (case mode
+;; ((info)
+;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+;; (print "youngest: "(hash-table-ref serversdat (car all-valid))))
+;; ((home) host)
+;; ((homehost) (cons host (am-home?))) ;; shut up old code
+;; ((home?) (am-home?))
+;; ((best-ten)(names->dats (best-ten)))
+;; ((all-valid)(names->dats all-valid))
+;; ((best) (let* ((best-ten (best-ten))
+;; (len (length best-ten)))
+;; (hash-table-ref serversdat (list-ref best-ten (random len)))))
+;; ((count)(length all-valid))
+;; (else
+;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
+;; #f)))
+;; (begin
+;; (server:run areapath)
+;; (set! server-last-start (current-seconds))
+;; ;; (thread-sleep! 3)
+;; (case mode
+;; ((homehost) (cons #f #f))
+;; (else #f))))))
+;;
+;; (define (server:get-servinfo-dir areapath)
+;; (let* ((spath (conc areapath"/.servinfo")))
+;; (if (not (file-exists? spath))
+;; (create-directory spath #t))
+;; spath))
+;;
+;; (define (server:clean-up-old areapath)
+;; ;; any server file that has not been touched in ten minutes is effectively dead
+;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
+;; (for-each
+;; (lambda (sfile)
+;; (let* ((modtime (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
+;; (current-seconds))
+;; (file-modification-time sfile))))
+;; (if (and (number? modtime)
+;; (> (- (current-seconds) modtime)
+;; 600))
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
+;; (delete-file sfile))))))
+;; sfiles)))
+;;
+;; ;; would like to eventually get rid of this
+;; ;;
+;; (define (common:on-homehost?)
+;; (server:choose-server *toppath* 'home?))
+;;
+;; ;; kind start up of server, wait before allowing another server for a given
+;; ;; area to be launched
+;; ;;
+;; (define (server:kind-run areapath)
+;; ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
+;; ;; and wait for it to be at least seconds old
+;; ;; (server:wait-for-server-start-last-flag areapath)
+;; (let loop ()
+;; (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
+;; (begin
+;; (if (common:low-noise-print 30 "our-host-load")
+;; (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
+;; (loop))))
+;; (if (< (server:choose-server areapath 'count) 20)
+;; (server:run areapath))
+;; #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+;; (let* ((lock-file (conc areapath "/logs/server-start.lock")))
+;; (let* ((start-flag (conc areapath "/logs/server-start-last")))
+;; (common:simple-file-lock-and-wait lock-file expire-time: 25)
+;; (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
+;; (system (conc "touch " start-flag)) ;; lazy but safe
+;; (server:run areapath)
+;; (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
+;; (common:simple-file-release-lock lock-file)))
+;; (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
+;;
+;; ;; this one seems to be the general entry point
+;; ;;
+;; (define (server:start-and-wait areapath #!key (timeout 60))
+;; (let ((give-up-time (+ (current-seconds) timeout)))
+;; (let loop ((server-info (server:check-if-running areapath))
+;; (try-num 0))
+;; (if (or server-info
+;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
+;; (server:record->url server-info)
+;; (let* ( (servers (server:choose-server areapath 'all-valid))
+;; (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
+;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again
+;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
+;; (server:run areapath))
+;; (thread-sleep! 5)
+;; (loop (server:check-if-running areapath)
+;; (+ try-num 1)))))))
+;;
+;; (define (server:get-num-servers #!key (numservers 2))
+;; (let ((ns (string->number
+;; (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
+;; (or ns numservers)))
+;;
+;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time.
+;; ;;
+;; (define (server:check-if-running areapath) ;; #!key (numservers "2"))
+;; (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
+;; (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
+;; (if (or (and servers
+;; (null? servers))
+;; (not servers))
+;; ;; (and (list? servers)
+;; ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
+;; #f
+;; (let loop ((hed (car servers))
+;; (tal (cdr servers)))
+;; (let ((res (server:check-server hed)))
+;; (if res
+;; hed
+;; (if (null? tal)
+;; #f
+;; (loop (car tal)(cdr tal)))))))))
+;;
+;; ;; ping the given server
+;; ;;
+;; (define (server:check-server server-record)
+;; (let* ((server-url (server:record->url server-record))
+;; (server-id (server:record->id server-record))
+;; (res (server:ping server-url server-id)))
+;; (if res
+;; server-url
+;; #f)))
+;;
+;; (define (server:kill servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((mod-time hostname port start-time server-id pid)
+;; servr))
+;; (tasks:kill-server hostname pid))))
+;;
+;; ;; called in megatest.scm, host-port is string hostname:port
+;; ;;
+;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
+;; ;; in the same process as the server.
+;; ;;
+;; (define (server:ping host:port server-id #!key (do-exit #f))
+;; (let* ((host-port (cond
+;; ((string? host:port)
+;; (let ((slst (string-split host:port ":")))
+;; (if (eq? (length slst) 2)
+;; (list (car slst)(string->number (cadr slst)))
+;; #f)))
+;; (else
+;; #f))))
+;; (cond
+;; ((and (list? host-port)
+;; (eq? (length host-port) 2))
+;; (let* ((myrunremote (make-remote))
+;; (iface (car host-port))
+;; (port (cadr host-port))
+;; (server-dat (client:connect iface port server-id myrunremote))
+;; (login-res (rmt:login-no-auto-client-setup myrunremote)))
+;; (if (and (list? login-res)
+;; (car login-res))
+;; (begin
+;; ;; (print "LOGIN_OK")
+;; (if do-exit (exit 0))
+;; #t)
+;; (begin
+;; ;; (print "LOGIN_FAILED")
+;; (if do-exit (exit 1))
+;; #f))))
+;; (else
+;; (if host:port
+;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
+;; (if do-exit
+;; (exit 1)
+;; #f)))))
+;;
+;; ;; run ping in separate process, safest way in some cases
+;; ;;
+;; (define (server:ping-server ifaceport)
+;; (with-input-from-pipe
+;; (conc (common:get-megatest-exe) " -ping " ifaceport)
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (res "NOREPLY"))
+;; (if (eof-object? inl)
+;; (case (string->symbol res)
+;; ((NOREPLY) #f)
+;; ((LOGIN_OK) #t)
+;; (else #f))
+;; (loop (read-line) inl))))))
+;;
+;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;; ;;
+;; (define (server:login toppath)
+;; (lambda (toppath)
+;; (set! *db-last-access* (current-seconds)) ;; might not be needed.
+;; (if (equal? *toppath* toppath)
+;; #t
+;; #f)))
+;;
+;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute
+;; ;; This is currently broken. Just use the number of hours with no unit.
+;; ;; Default is 60 seconds.
+;; ;;
+;; (define (server:expiration-timeout)
+;; (let ((tmo (configf:lookup *configdat* "server" "timeout")))
+;; (if (and (string? tmo)
+;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
+;; (* 3600 (string->number tmo))
+;; 600)))
+;;
+;; (define (server:get-best-guess-address hostname)
+;; (let ((res #f))
+;; (for-each
+;; (lambda (adr)
+;; (if (not (eq? (u8vector-ref adr 0) 127))
+;; (set! res adr)))
+;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+;; (string-intersperse
+;; (map number->string
+;; (u8vector->list
+;; (if res res (hostname->ip hostname)))) ".")))
+;;
+;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; ;; (define (server:release-sync-lock)
+;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; ;; (define (server:have-sync-lock?)
+;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;; ;; (have-lock? (car have-lock-pair))
+;; ;; (lock-time (cdr have-lock-pair))
+;; ;; (lock-age (- (current-seconds) lock-time)))
+;; ;; (cond
+;; ;; (have-lock? #t)
+;; ;; ((>lock-age
+;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;; ;; (server:release-sync-lock)
+;; ;; (server:have-sync-lock?))
+;; ;; (else #f))))
+;;
+;; ;; moving this here as it needs access to db and cannot be in common.
+;; ;;
+;;
+;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
+;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
+;; (lambda ()
+;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
+;; #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
+;; (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
+;; (tmp-area (common:get-db-tmp-area))
+;; (tmp-db (conc tmp-area "/megatest.db"))
+;; (staging-file (conc *toppath* "/.megatest.db"))
+;; (mtdbfile (conc *toppath* "/megatest.db"))
+;; (lockfile (common:get-sync-lock-filepath))
+;; (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
+;; (sync-cmd (if fork-to-background
+;; (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
+;; sync-cmd-core))
+;; (default-min-intersync-delay 2)
+;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
+;; (default-duty-cycle 0.1)
+;; (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
+;; (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
+;; (calculate-off-time (lambda (work-duration duty-cycle)
+;; (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
+;; (off-time min-intersync-delay) ;; adjusted in closure below.
+;; (do-a-sync
+;; (lambda ()
+;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
+;; (let* ((finalres
+;; (let retry-loop ((num-tries 0))
+;; (if (common:simple-file-lock lockfile)
+;; (begin
+;; (cond
+;; ((not (or fork-to-background persist-until-sync))
+;; (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
+;; " , off-time="off-time" seconds ]")
+;; (thread-sleep! (max off-time min-intersync-delay)))
+;; (else
+;; (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
+;;
+;; (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
+;; (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
+;; (delete-file* staging-file)
+;; (let* ((start-time (current-milliseconds))
+;; (res (system sync-cmd))
+;; (dbbackupfile (conc mtdbfile ".backup"))
+;; (res2
+;; (cond
+;; ((eq? 0 res )
+;; (handle-exceptions
+;; exn
+;; #f
+;; (if (file-exists? dbbackupfile)
+;; (delete-file* dbbackupfile)
+;; )
+;; (if (eq? 0 (file-size sync-log))
+;; (delete-file* sync-log))
+;; (system (conc "/bin/mv " staging-file " " mtdbfile))
+;;
+;; (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
+;; (set! off-time (calculate-off-time
+;; last-sync-seconds
+;; (cond
+;; ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
+;; duty-cycle)
+;; (else
+;; (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
+;; default-duty-cycle))))
+;;
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
+;; 'sync-completed))
+;; (else
+;; (system (conc "/bin/cp "sync-log" "sync-log".fail"))
+;; (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
+;; (if (file-exists? (conc mtdbfile ".backup"))
+;; (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
+;; #f))))
+;; (common:simple-file-release-lock lockfile)
+;; (BB> "released lockfile: " lockfile)
+;; (when (common:file-exists? lockfile)
+;; (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
+;; res2) ;; end let
+;; );; end begin
+;; ;; else
+;; (cond
+;; (persist-until-sync
+;; (thread-sleep! 1)
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
+;; (retry-loop (add1 num-tries)))
+;; (else
+;; (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
+;; 'parallel-sync-in-progress))
+;; ) ;; end if got lockfile
+;; )
+;; ))
+;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
+;; finalres)
+;; ) ;; end lambda
+;; ))
+;; do-a-sync))
+;;
+;;
ADDED servermod.scm
Index: servermod.scm
==================================================================
--- /dev/null
+++ servermod.scm
@@ -0,0 +1,821 @@
+;; Copyright 2006-2017, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+;;
+
+(declare (unit servermod))
+
+(module servermod
+*
+
+(import scheme
+ chicken
+
+ md5
+ message-digest
+ ports
+ posix
+ )
+
+(define *client-server-id* #f)
+
+;;======================================================================
+;; S E R V E R U T I L I T I E S
+;;======================================================================
+
+;; Generate a unique signature for this server
+(define (mk-signature)
+ (message-digest-string (md5-primitive)
+ (with-output-to-string
+ (lambda ()
+ (write (list (current-directory)
+ (current-process-id)
+ (argv)))))))
+
+(define (get-client-server-id)
+ (if *client-server-id* *client-server-id*
+ (let ((sig (mk-signature))) ;; clients re-use the server:mk-signature logic
+ (set! *client-server-id* sig)
+ *client-server-id*)))
+
+;; ;; When using zmq this would send the message back (two step process)
+;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;;
+;; (define (server:reply return-addr query-sig success/fail result)
+;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;; ;; (send-message pubsock target send-more: #t)
+;; ;; (send-message pubsock
+;; (case (server:get-transport)
+;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
+;; ((http) (db:obj->string (vector success/fail query-sig result)))
+;; ((fs) result)
+;; (else
+;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;; result)))
+;;
+;; ;; Given an area path, start a server process ### NOTE ### > file 2>&1
+;; ;; if the target-host is set
+;; ;; try running on that host
+;; ;; incidental: rotate logs in logs/ dir.
+;; ;;
+;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+;; (let* ((testsuite (common:get-testsuite-name))
+;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+;; (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
+;; ""))
+;; (cmdln (conc (common:get-megatest-exe)
+;; " -server - ";; (or target-host "-")
+;; (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+;; " -daemonize "
+;; "")
+;; ;; " -log " logfile
+;; " -m testsuite:" testsuite
+;; " " profile-mode
+;; )) ;; (conc " >> " logfile " 2>&1 &")))))
+;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
+;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
+;; ;; we want the remote server to start in *toppath* so push there
+;; (push-directory areapath)
+;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+;; (thread-start! log-rotate)
+;;
+;; ;; host.domain.tld match host?
+;; ;; (if (and target-host
+;; ;; ;; look at target host, is it host.domain.tld or ip address and does it
+;; ;; ;; match current ip or hostname
+;; ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+;; ;; (not (equal? curr-ip target-host)))
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+;; ;; (setenv "TARGETHOST" target-host)))
+;; ;;
+;; (setenv "TARGETHOST_LOGF" logfile)
+;; (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
+;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
+;; (system (conc "nbfake " cmdln))
+;; (unsetenv "TARGETHOST_LOGF")
+;; ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+;; (thread-join! log-rotate)
+;; (pop-directory)))
+;;
+;; ;; given a path to a server log return: host port startseconds server-id
+;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
+;; ;; example of what it's looking for in the log file:
+;; ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
+;;
+;; (define (server:logf-get-start-info logf)
+;; (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
+;; (dbprep-rx (regexp "^SERVER: dbprep"))
+;; (dbprep-found 0)
+;; (bad-dat (list #f #f #f #f #f)))
+;; (handle-exceptions
+;; exn
+;; (begin
+;; ;; WARNING: this is potentially dangerous to blanket ignore the errors
+;; (if (file-exists? logf)
+;; (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
+;; bad-dat) ;; no idea what went wrong, call it a bad server
+;; (with-input-from-file
+;; logf
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (lnum 0))
+;; (if (not (eof-object? inl))
+;; (let ((mlst (string-match server-rx inl))
+;; (dbprep (string-match dbprep-rx inl)))
+;; (if dbprep (set! dbprep-found 1))
+;; (if (not mlst)
+;; (if (< lnum 500) ;; give up if more than 500 lines of server log read
+;; (loop (read-line)(+ lnum 1))
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
+;; bad-dat))
+;; (match mlst
+;; ((_ host port start server-id pid)
+;; (list host
+;; (string->number port)
+;; (string->number start)
+;; server-id
+;; (string->number pid)))
+;; (else
+;; (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
+;; bad-dat))))
+;; (begin
+;; (if dbprep-found
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
+;; (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
+;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
+;; bad-dat))))))))
+;;
+;; ;; ;; get a list of servers from the log files, with all relevant data
+;; ;; ;; ( mod-time host port start-time pid )
+;; ;; ;;
+;; ;; (define (server:get-list areapath #!key (limit #f))
+;; ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
+;; ;; (day-seconds (* 24 60 60)))
+;; ;; ;; if the directory exists continue to get the list
+;; ;; ;; otherwise attempt to create the logs dir and then
+;; ;; ;; continue
+;; ;; (if (if (directory-exists? (conc areapath "/logs"))
+;; ;; '()
+;; ;; (if (file-write-access? areapath)
+;; ;; (begin
+;; ;; (condition-case
+;; ;; (create-directory (conc areapath "/logs") #t)
+;; ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
+;; ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
+;; ;; (directory-exists? (conc areapath "/logs")))
+;; ;; '()))
+;; ;;
+;; ;; ;; Get the list of server logs.
+;; ;; (let* (
+;; ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
+;; ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
+;; ;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
+;; ;; (num-serv-logs (length server-logs)))
+;; ;; (if (or (null? server-logs) (= num-serv-logs 0))
+;; ;; (let ()
+;; ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
+;; ;; '()
+;; ;; )
+;; ;; (let loop ((hed (string-chomp (car server-logs)))
+;; ;; (tal (cdr server-logs))
+;; ;; (res '()))
+;; ;; (let* ((mod-time (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
+;; ;; (current-seconds)) ;; 0
+;; ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
+;; ;; (down-time (- (current-seconds) mod-time))
+;; ;; (serv-dat (if (or (< num-serv-logs 10)
+;; ;; (< down-time 900)) ;; day-seconds))
+;; ;; (server:logf-get-start-info hed)
+;; ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
+;; ;; (serv-rec (cons mod-time serv-dat))
+;; ;; (fmatch (string-match fname-rx hed))
+;; ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
+;; ;; (new-res (if (null? serv-dat)
+;; ;; res
+;; ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
+;; ;; (if (null? tal)
+;; ;; (if (and limit
+;; ;; (> (length new-res) limit))
+;; ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
+;; ;; new-res)
+;; ;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
+;;
+;; #;(define (server:get-num-alive srvlst)
+;; (let ((num-alive 0))
+;; (for-each
+;; (lambda (server)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
+;; (match-let (((mod-time host port start-time server-id pid)
+;; server))
+;; (let* ((uptime (- (current-seconds) mod-time))
+;; (runtime (if start-time
+;; (- mod-time start-time)
+;; 0)))
+;; (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
+;; srvlst)
+;; num-alive))
+;;
+;; ;; ;; given a list of servers get a list of valid servers, i.e. at least
+;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is
+;; ;; ;; active (i.e. mod-time < 10 seconds
+;; ;; ;;
+;; ;; ;; mod-time host port start-time pid
+;; ;; ;;
+;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
+;; ;; ;; and servers should stick around for about two hours or so.
+;; ;; ;;
+;; ;; (define (server:get-best srvlst)
+;; ;; (let* ((nums (server:get-num-servers))
+;; ;; (now (current-seconds))
+;; ;; (slst (sort
+;; ;; (filter (lambda (rec)
+;; ;; (if (and (list? rec)
+;; ;; (> (length rec) 2))
+;; ;; (let ((start-time (list-ref rec 3))
+;; ;; (mod-time (list-ref rec 0)))
+;; ;; ;; (print "start-time: " start-time " mod-time: " mod-time)
+;; ;; (and start-time mod-time
+;; ;; (> (- now start-time) 0) ;; been running at least 0 seconds
+;; ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
+;; ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
+;; ;; (< (- now start-time)
+;; ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
+;; ;; 180)
+;; ;; (random 360)))) ;; under one hour running time +/- 180
+;; ;; ))
+;; ;; #f))
+;; ;; srvlst)
+;; ;; (lambda (a b)
+;; ;; (< (list-ref a 3)
+;; ;; (list-ref b 3))))))
+;; ;; (if (> (length slst) nums)
+;; ;; (take slst nums)
+;; ;; slst)))
+;;
+;; ;; ;; switch from server:get-list to server:get-servers-info
+;; ;; ;;
+;; ;; (define (server:get-first-best areapath)
+;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; (if (and srvrs
+;; ;; (not (null? srvrs)))
+;; ;; (car srvrs)
+;; ;; #f)))
+;; ;;
+;; ;; (define (server:get-rand-best areapath)
+;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; (if (and (list? srvrs)
+;; ;; (not (null? srvrs)))
+;; ;; (let* ((len (length srvrs))
+;; ;; (idx (random len)))
+;; ;; (list-ref srvrs idx))
+;; ;; #f)))
+;;
+;; (define (server:record->id servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((host port start-time server-id pid)
+;; servr))
+;; (if server-id
+;; server-id
+;; #f))))
+;;
+;; (define (server:record->url servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((host port start-time server-id pid)
+;; servr))
+;; (if (and host port)
+;; (conc host ":" port)
+;; #f))))
+;;
+;;
+;; ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
+;; ;; if it is old enough, overwrite it and wait 0.25 seconds.
+;; ;; if it then has the wrong server key, wait + 1 and call this function recursively.
+;; ;;
+;; #;(define (server:wait-for-server-start-last-flag areapath)
+;; (let* ((start-flag (conc areapath "/logs/server-start-last"))
+;; ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
+;; (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
+;; (server-key (conc (get-host-name) "-" (current-process-id))))
+;; (if (file-exists? start-flag)
+;; (let* ((fmodtime (file-modification-time start-flag))
+;; (delta (- (current-seconds) fmodtime))
+;; (old-enough (> delta idletime))
+;; (new-server-key ""))
+;; ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t.
+;; ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
+;; (if (and old-enough
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Writing " start-flag)
+;; (with-output-to-file start-flag (lambda () (print server-key)))
+;; (thread-sleep! 0.25)
+;; (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
+;; (equal? server-key new-server-key)))
+;; #t
+;; ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively.
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Gating server start, last start: "
+;; (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
+;;
+;; (thread-sleep! ( + 1 idletime))
+;; (server:wait-for-server-start-last-flag areapath)))))))
+;;
+;; ;; oldest server alive determines host then choose random of youngest
+;; ;; five servers on that host
+;; ;;
+;; (define (server:get-servers-info areapath)
+;; ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
+;; (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
+;; (if (not (file-exists? servinfodir))
+;; (create-directory servinfodir))
+;; (let* ((allfiles (glob (conc servinfodir"/*")))
+;; (res (make-hash-table)))
+;; (for-each
+;; (lambda (f)
+;; (let* ((hostport (pathname-strip-directory f))
+;; (serverdat (server:logf-get-start-info f)))
+;; (match serverdat
+;; ((host port start server-id pid)
+;; (if (and host port start server-id pid)
+;; (hash-table-set! res hostport serverdat)
+;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
+;; (else
+;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
+;; allfiles)
+;; res)))
+;;
+;; ;; check the .servinfo directory, are there other servers running on this
+;; ;; or another host?
+;; ;;
+;; ;; returns #t => ok to start another server
+;; ;; #f => not ok to start another server
+;; ;;
+;; (define (server:minimal-check areapath)
+;; (server:clean-up-old areapath)
+;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
+;; (servrs (glob (conc srvdir"/*")))
+;; (thishostip (server:get-best-guess-address (get-host-name)))
+;; (thisservrs (glob (conc srvdir"/"thishostip":*")))
+;; (homehostinf (server:choose-server areapath 'homehost))
+;; (havehome (car homehostinf))
+;; (wearehome (cdr homehostinf)))
+;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
+;; ", numservers: "(length thisservrs))
+;; (cond
+;; ((not havehome) #t) ;; no homehost yet, go for it
+;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
+;; ((and havehome (not wearehome)) #f) ;; we are not the home host
+;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
+;; (else
+;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
+;; #t))))
+;;
+;;
+;; (define server-last-start 0)
+;;
+;;
+;; ;; oldest server alive determines host then choose random of youngest
+;; ;; five servers on that host
+;; ;;
+;; ;; mode:
+;; ;; best - get best server (random of newest five)
+;; ;; home - get home host based on oldest server
+;; ;; info - print info
+;; (define (server:choose-server areapath #!optional (mode 'best))
+;; ;; age is current-starttime
+;; ;; find oldest alive
+;; ;; 1. sort by age ascending and ping until good
+;; ;; find alive rand from youngest
+;; ;; 1. sort by age descending
+;; ;; 2. take five
+;; ;; 3. check alive, discard if not and repeat
+;; ;; first we clean up old server files
+;; (server:clean-up-old areapath)
+;; (let* ((since-last (- (current-seconds) server-last-start))
+;; (server-start-delay 10))
+;; (if ( < (- (current-seconds) server-last-start) 10 )
+;; (begin
+;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
+;; (thread-sleep! server-start-delay)
+;; )
+;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; )
+;; )
+;; (let* ((serversdat (server:get-servers-info areapath))
+;; (servkeys (hash-table-keys serversdat))
+;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
+;; (sort servkeys ;; list of "host:port"
+;; (lambda (a b)
+;; (>= (list-ref (hash-table-ref serversdat a) 2)
+;; (list-ref (hash-table-ref serversdat b) 2))))
+;; '())))
+;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
+;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
+;; (if (not (null? by-time-asc))
+;; (let* ((oldest (last by-time-asc))
+;; (oldest-dat (hash-table-ref serversdat oldest))
+;; (host (list-ref oldest-dat 0))
+;; (all-valid (filter (lambda (x)
+;; (equal? host (list-ref (hash-table-ref serversdat x) 0)))
+;; by-time-asc))
+;; (best-ten (lambda ()
+;; (if (> (length all-valid) 11)
+;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
+;; (if (> (length all-valid) 8)
+;; (drop-right all-valid 1)
+;; all-valid))))
+;; (names->dats (lambda (names)
+;; (map (lambda (x)
+;; (hash-table-ref serversdat x))
+;; names)))
+;; (am-home? (lambda ()
+;; (let* ((currhost (get-host-name))
+;; (bestadrs (server:get-best-guess-address currhost)))
+;; (or (equal? host currhost)
+;; (equal? host bestadrs))))))
+;; (case mode
+;; ((info)
+;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+;; (print "youngest: "(hash-table-ref serversdat (car all-valid))))
+;; ((home) host)
+;; ((homehost) (cons host (am-home?))) ;; shut up old code
+;; ((home?) (am-home?))
+;; ((best-ten)(names->dats (best-ten)))
+;; ((all-valid)(names->dats all-valid))
+;; ((best) (let* ((best-ten (best-ten))
+;; (len (length best-ten)))
+;; (hash-table-ref serversdat (list-ref best-ten (random len)))))
+;; ((count)(length all-valid))
+;; (else
+;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
+;; #f)))
+;; (begin
+;; (server:run areapath)
+;; (set! server-last-start (current-seconds))
+;; ;; (thread-sleep! 3)
+;; (case mode
+;; ((homehost) (cons #f #f))
+;; (else #f))))))
+;;
+;; (define (server:get-servinfo-dir areapath)
+;; (let* ((spath (conc areapath"/.servinfo")))
+;; (if (not (file-exists? spath))
+;; (create-directory spath #t))
+;; spath))
+;;
+;; (define (server:clean-up-old areapath)
+;; ;; any server file that has not been touched in ten minutes is effectively dead
+;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
+;; (for-each
+;; (lambda (sfile)
+;; (let* ((modtime (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
+;; (current-seconds))
+;; (file-modification-time sfile))))
+;; (if (and (number? modtime)
+;; (> (- (current-seconds) modtime)
+;; 600))
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
+;; (delete-file sfile))))))
+;; sfiles)))
+;;
+;; ;; would like to eventually get rid of this
+;; ;;
+;; (define (common:on-homehost?)
+;; (server:choose-server *toppath* 'home?))
+;;
+;; ;; kind start up of server, wait before allowing another server for a given
+;; ;; area to be launched
+;; ;;
+;; (define (server:kind-run areapath)
+;; ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
+;; ;; and wait for it to be at least seconds old
+;; ;; (server:wait-for-server-start-last-flag areapath)
+;; (let loop ()
+;; (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
+;; (begin
+;; (if (common:low-noise-print 30 "our-host-load")
+;; (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
+;; (loop))))
+;; (if (< (server:choose-server areapath 'count) 20)
+;; (server:run areapath))
+;; #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+;; (let* ((lock-file (conc areapath "/logs/server-start.lock")))
+;; (let* ((start-flag (conc areapath "/logs/server-start-last")))
+;; (common:simple-file-lock-and-wait lock-file expire-time: 25)
+;; (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
+;; (system (conc "touch " start-flag)) ;; lazy but safe
+;; (server:run areapath)
+;; (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
+;; (common:simple-file-release-lock lock-file)))
+;; (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
+;;
+;; ;; this one seems to be the general entry point
+;; ;;
+;; (define (server:start-and-wait areapath #!key (timeout 60))
+;; (let ((give-up-time (+ (current-seconds) timeout)))
+;; (let loop ((server-info (server:check-if-running areapath))
+;; (try-num 0))
+;; (if (or server-info
+;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
+;; (server:record->url server-info)
+;; (let* ( (servers (server:choose-server areapath 'all-valid))
+;; (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
+;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again
+;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
+;; (server:run areapath))
+;; (thread-sleep! 5)
+;; (loop (server:check-if-running areapath)
+;; (+ try-num 1)))))))
+;;
+;; (define (server:get-num-servers #!key (numservers 2))
+;; (let ((ns (string->number
+;; (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
+;; (or ns numservers)))
+;;
+;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time.
+;; ;;
+;; (define (server:check-if-running areapath) ;; #!key (numservers "2"))
+;; (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
+;; (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
+;; (if (or (and servers
+;; (null? servers))
+;; (not servers))
+;; ;; (and (list? servers)
+;; ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
+;; #f
+;; (let loop ((hed (car servers))
+;; (tal (cdr servers)))
+;; (let ((res (server:check-server hed)))
+;; (if res
+;; hed
+;; (if (null? tal)
+;; #f
+;; (loop (car tal)(cdr tal)))))))))
+;;
+;; ;; ping the given server
+;; ;;
+;; (define (server:check-server server-record)
+;; (let* ((server-url (server:record->url server-record))
+;; (server-id (server:record->id server-record))
+;; (res (server:ping server-url server-id)))
+;; (if res
+;; server-url
+;; #f)))
+;;
+;; (define (server:kill servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((mod-time hostname port start-time server-id pid)
+;; servr))
+;; (tasks:kill-server hostname pid))))
+;;
+;; ;; called in megatest.scm, host-port is string hostname:port
+;; ;;
+;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
+;; ;; in the same process as the server.
+;; ;;
+;; (define (server:ping host:port server-id #!key (do-exit #f))
+;; (let* ((host-port (cond
+;; ((string? host:port)
+;; (let ((slst (string-split host:port ":")))
+;; (if (eq? (length slst) 2)
+;; (list (car slst)(string->number (cadr slst)))
+;; #f)))
+;; (else
+;; #f))))
+;; (cond
+;; ((and (list? host-port)
+;; (eq? (length host-port) 2))
+;; (let* ((myrunremote (make-remote))
+;; (iface (car host-port))
+;; (port (cadr host-port))
+;; (server-dat (client:connect iface port server-id myrunremote))
+;; (login-res (rmt:login-no-auto-client-setup myrunremote)))
+;; (if (and (list? login-res)
+;; (car login-res))
+;; (begin
+;; ;; (print "LOGIN_OK")
+;; (if do-exit (exit 0))
+;; #t)
+;; (begin
+;; ;; (print "LOGIN_FAILED")
+;; (if do-exit (exit 1))
+;; #f))))
+;; (else
+;; (if host:port
+;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
+;; (if do-exit
+;; (exit 1)
+;; #f)))))
+;;
+;; ;; run ping in separate process, safest way in some cases
+;; ;;
+;; (define (server:ping-server ifaceport)
+;; (with-input-from-pipe
+;; (conc (common:get-megatest-exe) " -ping " ifaceport)
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (res "NOREPLY"))
+;; (if (eof-object? inl)
+;; (case (string->symbol res)
+;; ((NOREPLY) #f)
+;; ((LOGIN_OK) #t)
+;; (else #f))
+;; (loop (read-line) inl))))))
+;;
+;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;; ;;
+;; (define (server:login toppath)
+;; (lambda (toppath)
+;; (set! *db-last-access* (current-seconds)) ;; might not be needed.
+;; (if (equal? *toppath* toppath)
+;; #t
+;; #f)))
+;;
+;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute
+;; ;; This is currently broken. Just use the number of hours with no unit.
+;; ;; Default is 60 seconds.
+;; ;;
+;; (define (server:expiration-timeout)
+;; (let ((tmo (configf:lookup *configdat* "server" "timeout")))
+;; (if (and (string? tmo)
+;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
+;; (* 3600 (string->number tmo))
+;; 600)))
+;;
+;; (define (server:get-best-guess-address hostname)
+;; (let ((res #f))
+;; (for-each
+;; (lambda (adr)
+;; (if (not (eq? (u8vector-ref adr 0) 127))
+;; (set! res adr)))
+;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+;; (string-intersperse
+;; (map number->string
+;; (u8vector->list
+;; (if res res (hostname->ip hostname)))) ".")))
+;;
+;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; ;; (define (server:release-sync-lock)
+;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; ;; (define (server:have-sync-lock?)
+;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;; ;; (have-lock? (car have-lock-pair))
+;; ;; (lock-time (cdr have-lock-pair))
+;; ;; (lock-age (- (current-seconds) lock-time)))
+;; ;; (cond
+;; ;; (have-lock? #t)
+;; ;; ((>lock-age
+;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;; ;; (server:release-sync-lock)
+;; ;; (server:have-sync-lock?))
+;; ;; (else #f))))
+;;
+;; ;; moving this here as it needs access to db and cannot be in common.
+;; ;;
+;;
+;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
+;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
+;; (lambda ()
+;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
+;; #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
+;; (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
+;; (tmp-area (common:get-db-tmp-area))
+;; (tmp-db (conc tmp-area "/megatest.db"))
+;; (staging-file (conc *toppath* "/.megatest.db"))
+;; (mtdbfile (conc *toppath* "/megatest.db"))
+;; (lockfile (common:get-sync-lock-filepath))
+;; (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
+;; (sync-cmd (if fork-to-background
+;; (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
+;; sync-cmd-core))
+;; (default-min-intersync-delay 2)
+;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
+;; (default-duty-cycle 0.1)
+;; (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
+;; (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
+;; (calculate-off-time (lambda (work-duration duty-cycle)
+;; (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
+;; (off-time min-intersync-delay) ;; adjusted in closure below.
+;; (do-a-sync
+;; (lambda ()
+;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
+;; (let* ((finalres
+;; (let retry-loop ((num-tries 0))
+;; (if (common:simple-file-lock lockfile)
+;; (begin
+;; (cond
+;; ((not (or fork-to-background persist-until-sync))
+;; (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
+;; " , off-time="off-time" seconds ]")
+;; (thread-sleep! (max off-time min-intersync-delay)))
+;; (else
+;; (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
+;;
+;; (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
+;; (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
+;; (delete-file* staging-file)
+;; (let* ((start-time (current-milliseconds))
+;; (res (system sync-cmd))
+;; (dbbackupfile (conc mtdbfile ".backup"))
+;; (res2
+;; (cond
+;; ((eq? 0 res )
+;; (handle-exceptions
+;; exn
+;; #f
+;; (if (file-exists? dbbackupfile)
+;; (delete-file* dbbackupfile)
+;; )
+;; (if (eq? 0 (file-size sync-log))
+;; (delete-file* sync-log))
+;; (system (conc "/bin/mv " staging-file " " mtdbfile))
+;;
+;; (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
+;; (set! off-time (calculate-off-time
+;; last-sync-seconds
+;; (cond
+;; ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
+;; duty-cycle)
+;; (else
+;; (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
+;; default-duty-cycle))))
+;;
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
+;; 'sync-completed))
+;; (else
+;; (system (conc "/bin/cp "sync-log" "sync-log".fail"))
+;; (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
+;; (if (file-exists? (conc mtdbfile ".backup"))
+;; (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
+;; #f))))
+;; (common:simple-file-release-lock lockfile)
+;; (BB> "released lockfile: " lockfile)
+;; (when (common:file-exists? lockfile)
+;; (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
+;; res2) ;; end let
+;; );; end begin
+;; ;; else
+;; (cond
+;; (persist-until-sync
+;; (thread-sleep! 1)
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
+;; (retry-loop (add1 num-tries)))
+;; (else
+;; (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
+;; 'parallel-sync-in-progress))
+;; ) ;; end if got lockfile
+;; )
+;; ))
+;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
+;; finalres)
+;; ) ;; end lambda
+;; ))
+;; do-a-sync))
+;;
+;;
+
+)
Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -22,11 +22,11 @@
(import (prefix sqlite3 sqlite3:))
(declare (unit tasks))
(declare (uses dbfile))
(declare (uses db))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses common))
(declare (uses pgdb))
(import dbfile)
;; (import pgdb) ;; pgdb is a module
Index: tcmt.scm
==================================================================
--- tcmt.scm
+++ tcmt.scm
@@ -27,11 +27,11 @@
(use trace)
;; (trace-call-sites #t)
(declare (uses margs))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses common))
;; (declare (uses megatest-version))
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
Index: tdb.scm
==================================================================
--- tdb.scm
+++ tdb.scm
@@ -29,11 +29,11 @@
(declare (unit tdb))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
-(declare (uses client))
+(declare (uses clientmod))
(declare (uses mt))
(declare (uses db))
(include "common_records.scm")
(include "db_records.scm")
Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -30,11 +30,11 @@
(declare (uses commonmod))
;; (declare (uses dcommon)) ;; needed for the steps processing
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))
-(declare (uses server))
+(declare (uses servermod))
;;(declare (uses stml2))
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
(import (prefix sqlite3 sqlite3:))
(import commonmod)
Index: tree.scm
==================================================================
--- tree.scm
+++ tree.scm
@@ -30,11 +30,11 @@
(declare (uses margs))
(declare (uses launch))
;; (declare (uses megatest-version))
(declare (uses gutils))
(declare (uses db))
-(declare (uses server))
+;; (declare (uses server))
;; (declare (uses synchash))
(declare (uses dcommon))
(include "megatest-version.scm")
(include "common_records.scm")