Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -20,26 +20,30 @@
SHELL=/bin/bash
PREFIX=$(PWD)
CSCOPTS=
INSTALL=install
SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm \
- server.scm configf.scm db.scm keys.scm margs.scm \
- process.scm runs.scm tasks.scm tests.scm genexample.scm \
- http-transport.scm tdb.scm client.scm mt.scm \
- ezsteps.scm lock-queue.scm rmt.scm api.scm \
- subrun.scm portlogger.scm archive.scm env.scm \
- diff-report.scm cgisetup/models/pgdb.scm
+ configf.scm db.scm keys.scm margs.scm process.scm runs.scm \
+ tasks.scm tests.scm genexample.scm tdb.scm mt.scm \
+ ezsteps.scm lock-queue.scm api.scm subrun.scm \
+ portlogger.scm archive.scm env.scm diff-report.scm \
+ cgisetup/models/pgdb.scm
+
+# server.scm http-transport.scm client.scm rmt.scm
# module source files
-MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm
+MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
+ configfmod.scm servermod.scm clientmod.scm rmtmod.scm \
+ artifacts.scm
-all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
+all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut
# dbmod.import.o is just a hack here
-mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
+mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o # dbmod.import.o
db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o
+mofiles/servermod.o : mofiles/artifacts.o
# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \
# mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \
# rmtmod.scm apimod.scm
@@ -258,10 +262,22 @@
$(PREFIX)/bin/mtexec : $(PREFIX)/bin/.$(ARCHSTR)/mtexec utils/mk_wrapper
utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec
chmod a+x $(PREFIX)/bin/mtexec
+# mtserv
+
+mtserv: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtserv.scm
+ csc $(CSCOPTS) $(OFILES) $(MOFILES) mtserv.scm -o mtserv
+
+$(PREFIX)/bin/.$(ARCHSTR)/mtserv : mtserv
+ $(INSTALL) mtserv $(PREFIX)/bin/.$(ARCHSTR)/mtserv
+
+$(PREFIX)/bin/mtserv : $(PREFIX)/bin/.$(ARCHSTR)/mtserv utils/mk_wrapper
+ utils/mk_wrapper $(PREFIX) mtserv $(PREFIX)/bin/mtserv
+ chmod a+x $(PREFIX)/bin/mtserv
+
# tcmt
$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
$(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt
@@ -364,18 +380,18 @@
$(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
$(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
$(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
- $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
+ $(PREFIX)/share/db/mt-pg.sql \
$(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0
# $(PREFIX)/bin/.$(ARCHSTR)/ndboard
-# $(PREFIX)/bin/newdashboard
+# $(PREFIX)/bin/newdashboard $(PREFIX)/bin/tcmt
$(PREFIX)/bin/.$(ARCHSTR) :
mkdir -p $(PREFIX)/bin/.$(ARCHSTR)
mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -21,18 +21,19 @@
;;======================================================================
(use srfi-69 posix)
(declare (unit api))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses tasks))
(import dbmod)
(import dbfile)
+(import rmtmod)
;; allow these queries through without starting a server
;;
(define api:read-only-queries
'(get-key-val-pairs
@@ -394,43 +395,5 @@
#;(common:telemetry-log (conc "api-out:"(->string cmd))
payload: `((params . ,params)
(ok-res . #f)))
(vector #t res))))))))
-;; http-server send-response
-;; api:process-request
-;; db:*
-;;
-;; NB// Runs on the server as part of the server loop
-;;
-(define (api:process-request dbstruct $) ;; the $ is the request vars proc
- (debug:print 4 *default-log-port* "server-id:" *server-id*)
- (let* ((cmd ($ 'cmd))
- (paramsj ($ 'params))
- (key ($ 'key))
- (params (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
- (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key)
- (if (equal? key *server-id*)
- (begin
- (set! *api-process-request-count* (+ *api-process-request-count* 1))
- (let* ((resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
- (success (vector-ref resdat 0))
- (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
- (debug:print 4 *default-log-port* "res:" res)
- (if (not success)
- (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
- (if (> *api-process-request-count* *max-api-process-requests*)
- (set! *max-api-process-requests* *api-process-request-count*))
- (set! *api-process-request-count* (- *api-process-request-count* 1))
- ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
- ;; (rmt:dat->json-str
- ;; (if (or (string? res)
- ;; (list? res)
- ;; (number? res)
- ;; (boolean? res))
- ;; res
- ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
- (db:obj->string res transport: 'http)))
- (begin
- (debug:print 0 *default-log-port* "Server refused to process request. Server id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params)
- (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))
-
Index: artifacts/artifacts.scm
==================================================================
--- artifacts/artifacts.scm
+++ artifacts/artifacts.scm
@@ -96,10 +96,12 @@
;; '((foods (fruit . f)
;; (meat . m)))))
;; => "beef"
;;
+;; NOTE: We call artifacts "arfs"
+
(module artifacts
(
;; cards, util and misc
;; sort-cards
;; calc-sha1
@@ -139,10 +141,11 @@
get-value ;; looks up a value given a key in a dartifact
flatten-all ;; merge the list of values from a query which includes a artifact into a flat alist <== really useful!
check-artifact
;; artifact alists
+get-artifact-fname
write-alist->artifact
read-artifact->alist
;; archive database
;; archive-open-db
@@ -199,22 +202,41 @@
;; new artifacts db
with-todays-adb
get-all-artifacts
refresh-artifacts-db
-
)
-(import (chicken base) scheme (chicken process) (chicken time posix)
+(import scheme)
+
+(cond-expand
+ (chicken-5
+ (import (chicken base)
+ (chicken process) (chicken time posix)
(chicken io) (chicken file) (chicken pathname)
chicken.process-context.posix (chicken string)
- (chicken time) (chicken sort) (chicken file posix) (chicken condition) srfi-1
- regex srfi-13 srfi-69 (chicken port) (chicken process-context)
- crypt sha1 matchable message-digest sqlite3 typed-records
- directory-utils
- scsh-process)
+ (chicken time) (chicken sort) (chicken file posix) (chicken condition)
+ (chicken port) (chicken process-context)
+ ))
+ (chicken-4
+ (import chicken
+ posix
+ data-structures
+ extras
+ ports
+ files
+ setup-api
+ )
+ (define file-executable? file-execute-access?))
+ (else))
+ (import srfi-69 srfi-1
+ regex srfi-13 srfi-69
+ crypt sha1 matchable message-digest sqlite3 typed-records
+ directory-utils
+ scsh-process)
+
;;======================================================================
;; DATA MANIPULATION UTILS
;;======================================================================
(define-inline (unescape-data data)
@@ -1070,15 +1092,18 @@
;;======================================================================
;; Read/write packets to files (convience functions)
;;======================================================================
+(define (get-artifact-fname targdir uuid)
+ (conc targdir "/" uuid ".artifact"))
+
;; write alist to a artifact file
;;
(define (write-alist->artifact targdir dat #!key (artifactspec '())(ptype #f))
(let-values (((uuid artifact)(alist->artifact dat artifactspec ptype: ptype)))
- (with-output-to-file (conc targdir "/" uuid ".artifact")
+ (with-output-to-file (get-artifact-fname targdir uuid)
(lambda ()
(print artifact)))
uuid)) ;; return the uuid
;; read artifact into alist
Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -28,135 +28,136 @@
(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-(module client
-*
-
-)
-
-(import client)
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-;; client:get-signature
-(define (client:get-signature)
- (if *my-client-signature* *my-client-signature*
- (let ((sig (conc (get-host-name) " " (current-process-id))))
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
-;; Not currently used! But, I think it *should* be used!!!
-#;(define (client:logout serverdat)
- (let ((ok (and (socket? serverdat)
- (cdb:logout serverdat *toppath* (client:get-signature)))))
- ok))
-
-;; Do all the connection work, look up the transport type and set up the
-;; connection if required.
-;;
-;; There are two scenarios.
-;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline
-;; 2. We are a run tests, list runs or other interactive process and we must figure out
-;; *transport-type* and *runremote* from the monitor.db
-;;
-;; client:setup
-;;
-;; lookup_server, need to remove *runremote* stuff
-;;
-
-;;(define (http-transport:server-dat-make-url runremote)
-(define (client:get-url runremote)
- (if (and (remote-iface runremote)
- (remote-port runremote))
- (conc "http://"
- (remote-iface runremote)
- ":"
- (remote-port runremote))
- #f))
-
-(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
- (mutex-lock! *rmt-mutex*)
- (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
- (mutex-unlock! *rmt-mutex*)
- res))
-
-(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0))
- (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
- (server:start-and-wait areapath)
- (if (<= remaining-tries 0)
- (begin
- (debug:print-error 0 *default-log-port* "failed to start or connect to server")
- (exit 1))
- ;;
- ;; Alternatively here, we can get the list of candidate servers and work our way
- ;; through them searching for a good one.
- ;;
- (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid
-;; (runremote (or area-dat *runremote*)))
- (if (not server-dat) ;; no server found
- (begin
- (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
- (match server-dat
- ((host port start-time server-id pid)
- (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
- (if (not runremote)
- (begin
- ;; Here we are creating a runremote where there was none or it was clobbered with #f
- ;;
- (set! runremote (make-remote))
- (let* ((server-info (server:check-if-running areapath)))
- (remote-server-info-set! runremote server-info)
- (if server-info
- (begin
- (remote-server-url-set! runremote (server:record->url server-info))
- (remote-server-id-set! runremote (server:record->id server-info)))))))
- ;; at this point we have a runremote
- (if (and host port server-id)
- (let* ((nada (client:connect host port server-id runremote))
- (ping-res (rmt:login-no-auto-client-setup runremote)))
- (if ping-res
- (if runremote
- (begin
- (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote))
- runremote)
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
- (begin ;; login failed but have a server record, clean out the record and try again
- (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
- (http-transport:close-connections runremote)
- (thread-sleep! 1)
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))
- )))
- (begin ;; no server registered
- ;; (server:kind-run areapath)
- (server:start-and-wait areapath)
- (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
- (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
- (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))))
- (else
- (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))
-
-;;
-;; connect - stored in remote-condat
-;;
-;; (define (http-transport:client-connect iface port server-id runremote)
-(define (client:connect iface port server-id runremote-in)
- (let* ((runremote (or runremote-in
- (make-runremote))))
- (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
- (let* ((api-url (conc "http://" iface ":" port "/api"))
- (api-uri (uri-reference (conc "http://" iface ":" port "/api")))
- (api-req (make-request method: 'POST uri: api-uri)))
- ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
- (remote-iface-set! runremote iface)
- (remote-port-set! runremote port)
- (remote-server-id-set! runremote server-id)
- (remote-connect-time-set! runremote (current-seconds))
- (remote-last-access-set! runremote (current-seconds))
- (remote-api-url-set! runremote api-url)
- (remote-api-uri-set! runremote api-uri)
- (remote-api-req-set! runremote api-req)
- runremote)))
-
+;; (module client
+;; *
+;;
+;; )
+;;
+;; (import client)
+;;
+;; (include "common_records.scm")
+;; (include "db_records.scm")
+;;
+;; ;; client:get-signature
+;; (define (client:get-signature)
+;; (if *my-client-signature* *my-client-signature*
+;; (let ((sig (conc (get-host-name) " " (current-process-id))))
+;; (set! *my-client-signature* sig)
+;; *my-client-signature*)))
+;;
+;; ;; Not currently used! But, I think it *should* be used!!!
+;; #;(define (client:logout serverdat)
+;; (let ((ok (and (socket? serverdat)
+;; (cdb:logout serverdat *toppath* (client:get-signature)))))
+;; ok))
+;;
+;; ;; Do all the connection work, look up the transport type and set up the
+;; ;; connection if required.
+;; ;;
+;; ;; There are two scenarios.
+;; ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline
+;; ;; 2. We are a run tests, list runs or other interactive process and we must figure out
+;; ;; *transport-type* and *runremote* from the monitor.db
+;; ;;
+;; ;; client:setup
+;; ;;
+;; ;; lookup_server, need to remove *runremote* stuff
+;; ;;
+;;
+;; ;;(define (http-transport:server-dat-make-url runremote)
+;; (define (client:get-url runremote)
+;; (if (and (remote-iface runremote)
+;; (remote-port runremote))
+;; (conc "http://"
+;; (remote-iface runremote)
+;; ":"
+;; (remote-port runremote))
+;; #f))
+;;
+;; (define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
+;; (mutex-lock! *rmt-mutex*)
+;; (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
+;; (mutex-unlock! *rmt-mutex*)
+;; res))
+;;
+;; (define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0))
+;; (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
+;; (server:start-and-wait areapath)
+;; (if (<= remaining-tries 0)
+;; (begin
+;; (debug:print-error 0 *default-log-port* "failed to start or connect to server")
+;; (exit 1))
+;; ;;
+;; ;; Alternatively here, we can get the list of candidate servers and work our way
+;; ;; through them searching for a good one.
+;; ;;
+;; (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid
+;; ;; (runremote (or area-dat *runremote*)))
+;; (if (not server-dat) ;; no server found
+;; (begin
+;; (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
+;; (match server-dat
+;; ((host port start-time server-id pid)
+;; (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
+;; (if (not runremote)
+;; (begin
+;; ;; Here we are creating a runremote where there was none or it was clobbered with #f
+;; ;;
+;; (set! runremote (make-remote))
+;; (let* ((server-info (server:check-if-running areapath)))
+;; (remote-server-info-set! runremote server-info)
+;; (if server-info
+;; (begin
+;; (remote-server-url-set! runremote (server:record->url server-info))
+;; (remote-server-id-set! runremote (server:record->id server-info)))))))
+;; ;; at this point we have a runremote
+;; (if (and host port server-id)
+;; (let* ((nada (client:connect host port server-id runremote))
+;; (ping-res (rmt:login-no-auto-client-setup runremote)))
+;; (if ping-res
+;; (if runremote
+;; (begin
+;; (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote))
+;; runremote)
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
+;; (begin ;; login failed but have a server record, clean out the record and try again
+;; (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
+;; (http-transport:close-connections runremote)
+;; (thread-sleep! 1)
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))
+;; )))
+;; (begin ;; no server registered
+;; ;; (server:kind-run areapath)
+;; (server:start-and-wait areapath)
+;; (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
+;; (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
+;; (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))))
+;; (else
+;; (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))
+;;
+;; ;;
+;; ;; connect - stored in remote-condat
+;; ;;
+;; ;; (define (http-transport:client-connect iface port server-id runremote)
+;; (define (client:connect iface port server-id runremote-in)
+;; (let* ((runremote (or runremote-in
+;; (make-runremote))))
+;; (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
+;; (let* ((api-url (conc "http://" iface ":" port "/api"))
+;; (api-uri (uri-reference (conc "http://" iface ":" port "/api")))
+;; (api-req (make-request method: 'POST uri: api-uri)))
+;; ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
+;; (remote-iface-set! runremote iface)
+;; (remote-port-set! runremote port)
+;; (remote-server-id-set! runremote server-id)
+;; (remote-connect-time-set! runremote (current-seconds))
+;; (remote-last-access-set! runremote (current-seconds))
+;; (remote-api-url-set! runremote api-url)
+;; (remote-api-uri-set! runremote api-uri)
+;; (remote-api-req-set! runremote api-req)
+;; runremote)))
+;;
+;;
ADDED clientmod.scm
Index: clientmod.scm
==================================================================
--- /dev/null
+++ clientmod.scm
@@ -0,0 +1,80 @@
+
+;; Copyright 2006-2012, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+;; C L I E N T S
+;;======================================================================
+
+;;(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5
+;; message-digest matchable spiffy uri-common intarweb http-client
+;; spiffy-request-vars uri-common intarweb directory-utils)
+
+(declare (unit clientmod))
+(declare (uses servermod))
+(declare (uses artifacts))
+
+(module clientmod
+*
+
+(import scheme
+ posix
+ data-structures
+ srfi-18
+ typed-records
+
+ artifacts
+ servermod
+ )
+
+(defstruct con ;; client connection
+ (hdir #f)
+ (obj-to-str #f)
+ (host #f)
+ (pid #f)
+ (sdat #f) ;; server artifact data
+ )
+
+(define (client:find-server areapath)
+ (let* ((sdir (conc areapath"/.server"))
+ (sarfs (glob (conc sdir"/*.artifact")))) ;; server artifacts
+ (if (null? sarfs)
+ (begin
+ (server:launch areapath)
+ (thread-sleep! 1)
+ (client:find-server areapath))
+ (let* ((sarf (car sarfs))
+ (sdat (read-artifact->alist sarf))
+ (hdir (alist-ref 'd sdat)))
+ (make-con hdir: hdir sdat: sdat)))))
+
+(define (client:send-receive con cmd params)
+ (let* ((obj->string (con-obj-to-str con))
+ (arf `((c . ,cmd)
+ (p . ,(obj->string params))
+ (h . ,(con-host con))
+ (i . ,(con-pid con))))
+ (hdir (con-hdir con))
+ (uuid (write-alist->artifact hdir arf ptype: 'Q)))
+
+ ;; wait for a response here
+
+ #f
+ ))
+
+)
+
Index: dashboard-context-menu.scm
==================================================================
--- dashboard-context-menu.scm
+++ dashboard-context-menu.scm
@@ -34,11 +34,11 @@
(declare (unit dashboard-context-menu))
(declare (uses common))
(declare (uses db))
(declare (uses gutils))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))
Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -33,11 +33,11 @@
(declare (unit dashboard-tests))
(declare (uses common))
(declare (uses db))
(declare (uses gutils))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -51,11 +51,11 @@
(declare (uses dbmod))
;; (declare (uses debugprint))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
-(declare (uses client))
+;; (declare (uses client))
(declare (uses mt))
(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
Index: diff-report.scm
==================================================================
--- diff-report.scm
+++ diff-report.scm
@@ -16,11 +16,11 @@
;; along with Megatest. If not, see .
;;
(declare (unit diff-report))
(declare (uses common))
-(declare (uses rmt))
+(declare (uses rmtmod))
(include "common_records.scm")
(use matchable)
(use fmt)
(use ducttape-lib)
Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -1,6 +1,5 @@
-
;; Copyright 2006-2012, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
@@ -14,680 +13,720 @@
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
-(require-extension (srfi 18) extras tcp s11n)
-
-
-(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras)
-
-(use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
-
-;; Configurations for server
-(tcp-buffer-size 2048)
-(max-connections 2048)
-
+;; (require-extension (srfi 18) extras tcp s11n)
+;;
+;;
+;; (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras)
+;;
+;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
+;;
+;; ;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048)
+;;
(declare (unit http-transport))
-
-(declare (uses common))
-(declare (uses db))
-(declare (uses tests))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-(declare (uses server))
-;; (declare (uses daemon))
-(declare (uses portlogger))
-(declare (uses rmt))
-(declare (uses dbfile))
-(declare (uses commonmod))
-
-(include "common_records.scm")
-(include "db_records.scm")
-(include "js-path.scm")
-
-(import dbfile commonmod)
-
-(require-library stml)
-(define (http-transport:make-server-url hostport)
- (if (not hostport)
- #f
- (conc "http://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-
-;;======================================================================
-;; S E R V E R
-;; ======================================================================
-
-;; Call this to start the actual server
-;;
-
-(define *db:process-queue-mutex* (make-mutex))
-
-(define (http-transport:run hostn)
- ;; Configurations for server
- (tcp-buffer-size 2048)
- (max-connections 2048)
- (debug:print 2 *default-log-port* "Attempting to start the server ...")
- (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
- (hostname (get-host-name))
- (ipaddrstr (let ((ipstr (if (string=? "-" hostn)
- ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
- (server:get-best-guess-address hostname)
- #f)))
- (if ipstr ipstr hostn))) ;; hostname)))
- (start-port (portlogger:open-run-close portlogger:find-port))
- (link-tree-path (common:get-linktree))
- (tmp-area (common:get-db-tmp-area))
- (start-file (conc tmp-area "/.server-start")))
- (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
- ;; set some parameters for the server
- (root-path (if link-tree-path
- link-tree-path
- (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
- (handle-directory spiffy-directory-listing)
- (handle-exception (lambda (exn chain)
- (signal (make-composite-condition
- (make-property-condition
- 'server
- 'message "server error")))))
-
- ;; http-transport:handle-directory) ;; simple-directory-handler)
- ;; Setup the web server and a /ctrl interface
- ;;
- (vhost-map `(((* any) . ,(lambda (continue)
- ;; open the db on the first call
- ;; This is were we set up the database connections
- (let* (($ (request-vars source: 'both))
- (dat ($ 'dat))
- (res #f))
- (cond
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "api"))
- (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc
- headers: '((content-type text/plain)))
- (mutex-lock! *heartbeat-mutex*)
- (set! *db-last-access* (current-seconds))
- (mutex-unlock! *heartbeat-mutex*))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ ""))
- (send-response body: (http-transport:main-page)))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "json_api"))
- (send-response body: (http-transport:main-page)))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "runs"))
- (send-response body: (http-transport:main-page)))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ any))
- (send-response body: "hey there!\n"
- headers: '((content-type text/plain))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "hey"))
- (send-response body: "hey there!\n"
- headers: '((content-type text/plain))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "jquery3.1.0.js"))
- (send-response body: (http-transport:show-jquery)
- headers: '((content-type application/javascript))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "test_log"))
- (send-response body: (http-transport:html-test-log $)
- headers: '((content-type text/HTML))))
- ((equal? (uri-path (request-uri (current-request)))
- '(/ "dashboard"))
- (send-response body: (http-transport:html-dboard $)
- headers: '((content-type text/HTML))))
- (else (continue))))))))
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn)
- (with-output-to-file start-file (lambda ()(print (current-process-id)))))
- (http-transport:try-start-server ipaddrstr start-port)))
-
-;; This is recursively run by http-transport:run until sucessful
-;;
-(define (http-transport:try-start-server ipaddrstr portnum)
- (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
- (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
- (if (not config-use-proxy)
- (determine-proxy (constantly #f)))
- (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
- (handle-exceptions
- exn
- (begin
- ;; (print-error-message exn)
- (if (< portnum 64000)
- (begin
- (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
- (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
- (debug:print 5 *default-log-port* "exn=" (condition->list exn))
- (portlogger:open-run-close portlogger:set-failed portnum)
- (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
- (thread-sleep! 0.1)
-
- ;; get_next_port goes here
- (http-transport:try-start-server ipaddrstr
- (portlogger:open-run-close portlogger:find-port)))
- (begin
- (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server"))))
- ;; any error in following steps will result in a retry
- (set! *server-info* (list ipaddrstr portnum))
- (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
- ;; This starts the spiffy server
- ;; NEED WAY TO SET IP TO #f TO BIND ALL
- ;; (start-server bind-address: ipaddrstr port: portnum)
- (if config-hostname ;; this is a hint to bind directly
- (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-")
- ;; ipaddrstr
- ;; config-hostname))
- (start-server port: portnum))
- (portlogger:open-run-close portlogger:set-port portnum "released")
- (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
-
-;;======================================================================
-;; S E R V E R U T I L I T I E S
-;;======================================================================
-
-;;======================================================================
-;; C L I E N T S
-;;======================================================================
-
-(define *http-mutex* (make-mutex))
-
-;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here
-;; I'm pretty sure it is defunct.
-
-;; This next block all imported en-mass from the api branch
-(define *http-requests-in-progress* 0)
-(define *http-connections-next-cleanup* (current-seconds))
-
-(define (http-transport:get-time-to-cleanup)
- (let ((res #f))
- (mutex-lock! *http-mutex*)
- (set! res (> (current-seconds) *http-connections-next-cleanup*))
- (mutex-unlock! *http-mutex*)
- res))
-
-(define (http-transport:inc-requests-count)
- (mutex-lock! *http-mutex*)
- (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))
- ;; Use this opportunity to slow things down iff there are too many requests in flight
- (if (> *http-requests-in-progress* 5)
- (begin
- (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...")
- (thread-sleep! 1)))
- (mutex-unlock! *http-mutex*))
-
-(define (http-transport:dec-requests-count proc)
- (mutex-lock! *http-mutex*)
- (proc)
- (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
- (mutex-unlock! *http-mutex*))
-
-(define (http-transport:dec-requests-count-and-close-all-connections)
- (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
- (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds
- (if (> *http-requests-in-progress* 0)
- (if (> etime (current-seconds))
- (begin
- (thread-sleep! 0.05)
- (loop etime))
- (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections"))
- (close-all-connections!)))
- (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
- (mutex-unlock! *http-mutex*))
-
-(define (http-transport:inc-requests-and-prep-to-close-all-connections)
- (mutex-lock! *http-mutex*)
- (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
-
-;; Send "cmd" with json payload "params" to serverdat and receive result
-;;
-(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3))
- (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat)
- (let* ((fullurl (remote-api-req runremote))
- (res (vector #f "uninitialized"))
- (success #t)
- (sparams (db:obj->string params transport: 'http))
- (server-id (remote-server-id runremote)))
- (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds))
-
- ;; set up the http-client here
- (max-retry-attempts 1)
- ;; consider all requests indempotent
- (retry-request? (lambda (request)
- #f))
- ;; send the data and get the response
- ;; extract the needed info from the http data and
- ;; process and return it.
- (let* ((send-recieve (lambda ()
- (mutex-lock! *http-mutex*)
- ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
- ;; ((exn http client-error) e (print e)))
- (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
- success
- (db:string->obj
- (handle-exceptions
- exn
- (let ((call-chain (get-call-chain))
- (msg ((condition-property-accessor 'exn 'message) exn)))
- (set! success #f)
- (if (debug:debug-mode 1)
- (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
- (begin
- (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
- (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
- (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
- (debug:print 0 *default-log-port* " call-chain: " call-chain)))
- ;; what if another thread is communicating ok? Can't happen due to mutex
- (http-transport:close-connections runremote)
- (mutex-unlock! *http-mutex*)
- ;; (close-connection! fullurl)
- (db:obj->string #f))
- (with-input-from-request ;; was dat
- fullurl
- (list (cons 'key (or server-id "thekey"))
- (cons 'cmd cmd)
- (cons 'params sparams))
- read-string))
- transport: 'http)
- 0)) ;; added this speculatively
- ;; Shouldn't this be a call to the managed call-all-connections stuff above?
- ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections?
- (mutex-unlock! *http-mutex*)
- ))
- (time-out (lambda ()
- (thread-sleep! 45)
- (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
- #f))
- (th1 (make-thread send-recieve "with-input-from-request"))
- (th2 (make-thread time-out "time out")))
- (thread-start! th1)
- (thread-start! th2)
- (thread-join! th1)
- (vector-set! res 0 success)
- (thread-terminate! th2)
- (if (vector? res)
- (if (vector-ref res 0) ;; this is the first flag or the second flag?
- (let* ((res-dat (vector-ref res 1)))
- (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
- (signal (make-composite-condition
- (make-property-condition
- 'servermismatch
- 'message (vector-ref res 1))))
- res)) ;; this is the *inner* vector? seriously? why?
- (if (debug:debug-mode 11)
- (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
- (print-call-chain (current-error-port))
- (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
- (debug:print 11 *default-log-port* " server call chain:")
- (pp (vector-ref res 1) (current-error-port))
- (signal (vector-ref res 0)))
- res))
- (signal (make-composite-condition
- (make-property-condition
- 'timeout
- 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))
-
-;; careful closing of connections stored in *runremote*
-;;
-(define (http-transport:close-connections runremote)
- (if (remote? runremote)
- (let ((api-dat (remote-api-uri runremote)))
- (handle-exceptions
- exn
- (begin
- (print-call-chain *default-log-port*)
- (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
- (if (args:any-defined? "-server" "-execute" "-run")
- (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
- (if api-dat (close-connection! api-dat))
- (remote-conndat-set! runremote #f)
- #t))
- #f))
-
-;; run http-transport:keep-running in a parallel thread to monitor that the db is being
-;; used and to shutdown after sometime if it is not.
-;;
-(define (http-transport:keep-running)
- ;; if none running or if > 20 seconds since
- ;; server last used then start shutdown
- ;; This thread waits for the server to come alive
- (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
- (let* ((servinfofile #f)
- (sdat #f)
- (no-sync-db (db:open-no-sync-db))
- (tmp-area (common:get-db-tmp-area))
- (started-file (conc tmp-area "/.server-started"))
- (server-start-time (current-seconds))
- (server-info (let loop ((start-time (current-seconds))
- (changed #t)
- (last-sdat "not this"))
- (begin ;; let ((sdat #f))
- (thread-sleep! 0.01)
- (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
- (mutex-lock! *heartbeat-mutex*)
- (set! sdat *server-info*)
- (mutex-unlock! *heartbeat-mutex*)
- (if (and sdat
- (not changed)
- (> (- (current-seconds) start-time) 2))
- (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo"))
- (ipaddr (car sdat))
- (port (cadr sdat))
- (servinf (conc servinfodir"/"ipaddr":"port)))
- (set! servinfofile servinf)
- (if (not (file-exists? servinfodir))
- (create-directory servinfodir #t))
- (with-output-to-file servinf
- (lambda ()
- (let* ((serv-id (server:mk-signature)))
- (set! *server-id* serv-id)
- (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))
- (print "started: "(seconds->year-week/day-time (current-seconds))))))
- (set! *on-exit-procs* (cons
- (lambda ()
- (delete-file* servinf))
- *on-exit-procs*))
- ;; put data about this server into a simple flat file host.port
- (debug:print-info 0 *default-log-port* "Received server alive signature")
- sdat)
- (begin
- (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
- (sleep 4)
- (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
- (if sdat
- (let* ((ipaddr (car sdat))
- (port (cadr sdat))
- (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port)))
- (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
- (exit))
- (exit)
- )
- (loop start-time
- (equal? sdat last-sdat)
- sdat)))))))
- (iface (car server-info))
- (port (cadr server-info))
- (last-access 0)
- (server-timeout (server:expiration-timeout))
- (server-going #f)
- (server-log-file (args:get-arg "-log"))) ;; always set when we are a server
-
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
- (with-output-to-file started-file (lambda ()(print (current-process-id)))))
-
- (let loop ((count 0)
- (server-state 'available)
- (bad-sync-count 0)
- (start-time (current-milliseconds)))
-
- ;; Use this opportunity to sync the tmp db to megatest.db
- (if (not server-going) ;; *dbstruct-dbs*
- (begin
- (debug:print 0 *default-log-port* "SERVER: dbprep")
- (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!!
- (set! server-going #t)
- (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
- (if (and no-sync-db
- (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
- (begin
- (if (common:low-noise-print 120 "sync-all-print")
- (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")))
- (db:all-db-sync *dbstruct-dbs*)
- )))
-
- ;; when things go wrong we don't want to be doing the various queries too often
- ;; so we strive to run this stuff only every four seconds or so.
- (let* ((sync-time (- (current-milliseconds) start-time))
- (rem-time (quotient (- 4000 sync-time) 1000)))
- (if (and (<= rem-time 4)
- (> rem-time 0))
- (thread-sleep! rem-time)))
-
- (if (< count 1) ;; 3x3 = 9 secs aprox
- (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
-
- ;; Check that iface and port have not changed (can happen if server port collides)
- (mutex-lock! *heartbeat-mutex*)
- (set! sdat *server-info*)
- (mutex-unlock! *heartbeat-mutex*)
-
- (if (not (equal? sdat (list iface port)))
- (let ((new-iface (car sdat))
- (new-port (cadr sdat)))
- (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
- (set! iface new-iface)
- (set! port new-port)
- (if (not *server-id*)
- (set! *server-id* (server:mk-signature)))
- (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
- (flush-output *default-log-port*)))
-
- ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
- (mutex-lock! *heartbeat-mutex*)
- (set! last-access *db-last-access*)
- (mutex-unlock! *heartbeat-mutex*)
-
- (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
- (begin
- (if (not *server-id*)
- (set! *server-id* (server:mk-signature)))
- (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
- (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
- (flush-output *default-log-port*)))
- (if (common:low-noise-print 60 "dbstats")
- (begin
- (debug:print 0 *default-log-port* "Server stats:")
- (db:print-current-query-stats)))
- (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
- (cond
- ((and *server-run*
- (> (+ last-access server-timeout)
- (current-seconds)))
- (if (common:low-noise-print 120 "server continuing")
- (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
- (let ((curr-time (current-seconds)))
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
- (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
- (not *server-overloaded*)
- (file-exists? servinfofile))
- (change-file-times servinfofile curr-time curr-time)))
- (if (and (common:low-noise-print 120 "start new server")
- (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
- (begin
- (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...")
- (server:kind-run *toppath*)
- (if (> *api-process-request-count* 100)
- (begin
- (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile)
- (delete-file* servinfofile)))))))
- (loop 0 server-state bad-sync-count (current-milliseconds)))
- (else
- (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
- (http-transport:server-shutdown port)))))))
-
-(define (http-transport:server-shutdown port)
- (begin
- ;;(BB> "http-transport:server-shutdown called")
- (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
- ;;
- ;; start_shutdown
- ;;
- (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
- (portlogger:open-run-close portlogger:set-port port "released")
- (thread-sleep! 1)
-
- ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
- ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*)
- ;; (debug:print-info 0 *default-log-port* "Average cached write time "
- ;; (if (eq? *number-of-writes* 0)
- ;; "n/a (no writes)"
- ;; (/ *writes-total-delay*
- ;; *number-of-writes*))
- ;; " ms")
- ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*)
- ;; (debug:print-info 0 *default-log-port* "Average non-cached time "
- ;; (if (eq? *number-non-write-queries* 0)
- ;; "n/a (no queries)"
- ;; (/ *total-non-write-delay*
- ;; *number-non-write-queries*))
- ;; " ms")
-
- (db:print-current-query-stats)
- #;(common:save-pkt `((action . exit)
- (T . server)
- (pid . ,(current-process-id)))
- *configdat* #t)
-
- ;; remove .servinfo file(s) here
-
- (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
- (exit)))
-
-;; all routes though here end in exit ...
-;;
-;; start_server?
-;;
-(define (http-transport:launch)
- ;; check the .servinfo directory, are there other servers running on this
- ;; or another host?
- (let* ((server-start-is-ok (server:minimal-check *toppath*)))
- (if (not server-start-is-ok)
- (begin
- (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.")
- (exit 1))))
-
- ;; check that a server start is in progress, pause or exit if so
- (let* ((th2 (make-thread (lambda ()
- (debug:print-info 0 *default-log-port* "Server run thread started")
- (http-transport:run
- (if (args:get-arg "-server")
- (args:get-arg "-server")
- "-")
- )) "Server run"))
- (th3 (make-thread (lambda ()
- (debug:print-info 0 *default-log-port* "Server monitor thread started")
- (http-transport:keep-running)
- "Keep running"))))
- (thread-start! th2)
- (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
- (thread-start! th3)
- (set! *didsomething* #t)
- (thread-join! th2)
- (exit)))
-
-;; (define (http-transport:server-signal-handler signum)
-;; (signal-mask! signum)
-;; (handle-exceptions
-;; exn
-;; (debug:print 0 *default-log-port* " ... exiting ...")
-;; (let ((th1 (make-thread (lambda ()
-;; (thread-sleep! 1))
-;; "eat response"))
-;; (th2 (make-thread (lambda ()
-;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
-;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
-;; (debug:print 0 *default-log-port* " Done.")
-;; (exit 4))
-;; "exit on ^C timer")))
-;; (thread-start! th2)
-;; (thread-start! th1)
-;; (thread-join! th2))))
-
-;;===============================================
-;; Java script
-;;===============================================
-(define (http-transport:show-jquery)
- (let* ((data (tests:readlines *java-script-lib*)))
-(string-join data "\n")))
-
-
-
-;;======================================================================
-;; web pages
-;;======================================================================
-
-(define (http-transport:html-test-log $)
- (let* ((run-id ($ 'runid))
- (test-item ($ 'testname))
- (parts (string-split test-item ":"))
- (test-name (car parts))
-
- (item-name (if (equal? (length parts) 1)
- ""
- (cadr parts))))
- ;(print $)
-(tests:get-test-log run-id test-name item-name)))
-
-
-(define (http-transport:html-dboard $)
- (let* ((page ($ 'page))
- (oup (open-output-string))
- (bdy "--------------------------")
-
- (ret (tests:dynamic-dboard page)))
- (s:output-new oup ret)
- (close-output-port oup)
-
- (set! bdy (get-output-string oup))
- (conc "
Dashboard
" bdy "
" )))
-
-(define (http-transport:main-page)
- (let ((linkpath (root-path)))
- (conc "" (pathname-strip-directory *toppath*) "
"
- ""
- "Run area: " *toppath*
- "Server Stats
"
- (http-transport:stats-table)
- "
"
- (http-transport:runs linkpath)
- "
"
- ;; (http-transport:run-stats)
- ""
- )))
-
-(define (http-transport:stats-table)
- (mutex-lock! *heartbeat-mutex*)
- (let ((res
- (conc ""
- ;; "Max cached queries | " *max-cache-size* " |
"
- "Number of cached writes | " *number-of-writes* " |
"
- "Average cached write time | " (if (eq? *number-of-writes* 0)
- "n/a (no writes)"
- (/ *writes-total-delay*
- *number-of-writes*))
- " ms |
"
- "Number non-cached queries | " *number-non-write-queries* " |
"
- ;; "Average non-cached time | " (if (eq? *number-non-write-queries* 0)
- ;; "n/a (no queries)"
- ;; (/ *total-non-write-delay*
- ;; *number-non-write-queries*))
- " ms |
"
- "Last access | " (seconds->time-string *db-last-access*) " |
"
- "
")))
- (mutex-unlock! *heartbeat-mutex*)
- res))
-
-(define (http-transport:runs linkpath)
- (conc "Runs
"
- (string-intersperse
- (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
- (map (lambda (p)
- (conc "" p "
"))
- files))
- " ")))
-
-#;(define (http-transport:run-stats)
- (let ((stats (open-run-close db:get-running-stats #f)))
- (conc ""
- (string-intersperse
- (map (lambda (stat)
- (conc "" (car stat) " | " (cadr stat) " |
"))
- stats)
- " ")
- "
")))
+;;
+;; (declare (uses common))
+;; (declare (uses db))
+;; (declare (uses tests))
+;; (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
+;; (declare (uses server))
+;; ;; (declare (uses daemon))
+;; (declare (uses portlogger))
+;; (declare (uses rmt))
+;; (declare (uses dbfile))
+;; (declare (uses commonmod))
+;;
+;; (include "common_records.scm")
+;; (include "db_records.scm")
+;; (include "js-path.scm")
+;;
+;; (import dbfile commonmod)
+;;
+;; (require-library stml)
+;; (define (http-transport:make-server-url hostport)
+;; (if (not hostport)
+;; #f
+;; (conc "http://" (car hostport) ":" (cadr hostport))))
+;;
+;; (define *server-loop-heart-beat* (current-seconds))
+;;
+;; ;;======================================================================
+;; ;; S E R V E R
+;; ;; ======================================================================
+;;
+;; ;; Call this to start the actual server
+;; ;;
+;;
+;; (define *db:process-queue-mutex* (make-mutex))
+;;
+;; (define (http-transport:run hostn)
+;; ;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048)
+;; (debug:print 2 *default-log-port* "Attempting to start the server ...")
+;; (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
+;; (hostname (get-host-name))
+;; (ipaddrstr (let ((ipstr (if (string=? "-" hostn)
+;; ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
+;; (server:get-best-guess-address hostname)
+;; #f)))
+;; (if ipstr ipstr hostn))) ;; hostname)))
+;; (start-port (portlogger:open-run-close portlogger:find-port))
+;; (link-tree-path (common:get-linktree))
+;; (tmp-area (common:get-db-tmp-area))
+;; (start-file (conc tmp-area "/.server-start")))
+;; (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
+;; ;; set some parameters for the server
+;; (root-path (if link-tree-path
+;; link-tree-path
+;; (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
+;; (handle-directory spiffy-directory-listing)
+;; (handle-exception (lambda (exn chain)
+;; (signal (make-composite-condition
+;; (make-property-condition
+;; 'server
+;; 'message "server error")))))
+;;
+;; ;; http-transport:handle-directory) ;; simple-directory-handler)
+;; ;; Setup the web server and a /ctrl interface
+;; ;;
+;; (vhost-map `(((* any) . ,(lambda (continue)
+;; ;; open the db on the first call
+;; ;; This is were we set up the database connections
+;; (let* (($ (request-vars source: 'both))
+;; (dat ($ 'dat))
+;; (res #f))
+;; (cond
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "api"))
+;; (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc
+;; headers: '((content-type text/plain)))
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! *db-last-access* (current-seconds))
+;; (mutex-unlock! *heartbeat-mutex*))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ ""))
+;; (send-response body: (http-transport:main-page)))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "json_api"))
+;; (send-response body: (http-transport:main-page)))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "runs"))
+;; (send-response body: (http-transport:main-page)))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ any))
+;; (send-response body: "hey there!\n"
+;; headers: '((content-type text/plain))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "hey"))
+;; (send-response body: "hey there!\n"
+;; headers: '((content-type text/plain))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "jquery3.1.0.js"))
+;; (send-response body: (http-transport:show-jquery)
+;; headers: '((content-type application/javascript))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "test_log"))
+;; (send-response body: (http-transport:html-test-log $)
+;; headers: '((content-type text/HTML))))
+;; ((equal? (uri-path (request-uri (current-request)))
+;; '(/ "dashboard"))
+;; (send-response body: (http-transport:html-dboard $)
+;; headers: '((content-type text/HTML))))
+;; (else (continue))))))))
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn)
+;; (with-output-to-file start-file (lambda ()(print (current-process-id)))))
+;; (http-transport:try-start-server ipaddrstr start-port)))
+;;
+;; ;; This is recursively run by http-transport:run until sucessful
+;; ;;
+;; (define (http-transport:try-start-server ipaddrstr portnum)
+;; (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
+;; (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
+;; (if (not config-use-proxy)
+;; (determine-proxy (constantly #f)))
+;; (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; ;; (print-error-message exn)
+;; (if (< portnum 64000)
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
+;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+;; (debug:print 5 *default-log-port* "exn=" (condition->list exn))
+;; (portlogger:open-run-close portlogger:set-failed portnum)
+;; (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
+;; (thread-sleep! 0.1)
+;;
+;; ;; get_next_port goes here
+;; (http-transport:try-start-server ipaddrstr
+;; (portlogger:open-run-close portlogger:find-port)))
+;; (begin
+;; (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server"))))
+;; ;; any error in following steps will result in a retry
+;; (set! *server-info* (list ipaddrstr portnum))
+;; (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
+;; ;; This starts the spiffy server
+;; ;; NEED WAY TO SET IP TO #f TO BIND ALL
+;; ;; (start-server bind-address: ipaddrstr port: portnum)
+;; (if config-hostname ;; this is a hint to bind directly
+;; (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-")
+;; ;; ipaddrstr
+;; ;; config-hostname))
+;; (start-server port: portnum))
+;; (portlogger:open-run-close portlogger:set-port portnum "released")
+;; (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
+;;
+;; ;;======================================================================
+;; ;; S E R V E R U T I L I T I E S
+;; ;;======================================================================
+;;
+;; ;;======================================================================
+;; ;; C L I E N T S
+;; ;;======================================================================
+;;
+;; (define *http-mutex* (make-mutex))
+;;
+;; ;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here
+;; ;; I'm pretty sure it is defunct.
+;;
+;; ;; This next block all imported en-mass from the api branch
+;; (define *http-requests-in-progress* 0)
+;; (define *http-connections-next-cleanup* (current-seconds))
+;;
+;; (define (http-transport:get-time-to-cleanup)
+;; (let ((res #f))
+;; (mutex-lock! *http-mutex*)
+;; (set! res (> (current-seconds) *http-connections-next-cleanup*))
+;; (mutex-unlock! *http-mutex*)
+;; res))
+;;
+;; (define (http-transport:inc-requests-count)
+;; (mutex-lock! *http-mutex*)
+;; (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))
+;; ;; Use this opportunity to slow things down iff there are too many requests in flight
+;; (if (> *http-requests-in-progress* 5)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...")
+;; (thread-sleep! 1)))
+;; (mutex-unlock! *http-mutex*))
+;;
+;; (define (http-transport:dec-requests-count proc)
+;; (mutex-lock! *http-mutex*)
+;; (proc)
+;; (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
+;; (mutex-unlock! *http-mutex*))
+;;
+;; (define (http-transport:dec-requests-count-and-close-all-connections)
+;; (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
+;; (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds
+;; (if (> *http-requests-in-progress* 0)
+;; (if (> etime (current-seconds))
+;; (begin
+;; (thread-sleep! 0.05)
+;; (loop etime))
+;; (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections"))
+;; (close-all-connections!)))
+;; (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
+;; (mutex-unlock! *http-mutex*))
+;;
+;; (define (http-transport:inc-requests-and-prep-to-close-all-connections)
+;; (mutex-lock! *http-mutex*)
+;; (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
+;;
+;; ;; Send "cmd" with json payload "params" to serverdat and receive result
+;; ;;
+;; (define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3))
+;; (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat)
+;; (let* ((fullurl (remote-api-req runremote))
+;; (res (vector #f "uninitialized"))
+;; (success #t)
+;; (sparams (db:obj->string params transport: 'http))
+;; (server-id (remote-server-id runremote)))
+;; (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds))
+;;
+;; ;; set up the http-client here
+;; (max-retry-attempts 1)
+;; ;; consider all requests indempotent
+;; (retry-request? (lambda (request)
+;; #f))
+;; ;; send the data and get the response
+;; ;; extract the needed info from the http data and
+;; ;; process and return it.
+;; (let* ((send-recieve (lambda ()
+;; (mutex-lock! *http-mutex*)
+;; ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
+;; ;; ((exn http client-error) e (print e)))
+;; (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
+;; success
+;; (db:string->obj
+;; (handle-exceptions
+;; exn
+;; (let ((call-chain (get-call-chain))
+;; (msg ((condition-property-accessor 'exn 'message) exn)))
+;; (set! success #f)
+;; (if (debug:debug-mode 1)
+;; (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
+;; (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
+;; (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
+;; (debug:print 0 *default-log-port* " call-chain: " call-chain)))
+;; ;; what if another thread is communicating ok? Can't happen due to mutex
+;; (http-transport:close-connections runremote)
+;; (mutex-unlock! *http-mutex*)
+;; ;; (close-connection! fullurl)
+;; (db:obj->string #f))
+;; (with-input-from-request ;; was dat
+;; fullurl
+;; (list (cons 'key (or server-id "thekey"))
+;; (cons 'cmd cmd)
+;; (cons 'params sparams))
+;; read-string))
+;; transport: 'http)
+;; 0)) ;; added this speculatively
+;; ;; Shouldn't this be a call to the managed call-all-connections stuff above?
+;; ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections?
+;; (mutex-unlock! *http-mutex*)
+;; ))
+;; (time-out (lambda ()
+;; (thread-sleep! 45)
+;; (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
+;; #f))
+;; (th1 (make-thread send-recieve "with-input-from-request"))
+;; (th2 (make-thread time-out "time out")))
+;; (thread-start! th1)
+;; (thread-start! th2)
+;; (thread-join! th1)
+;; (vector-set! res 0 success)
+;; (thread-terminate! th2)
+;; (if (vector? res)
+;; (if (vector-ref res 0) ;; this is the first flag or the second flag?
+;; (let* ((res-dat (vector-ref res 1)))
+;; (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
+;; (signal (make-composite-condition
+;; (make-property-condition
+;; 'servermismatch
+;; 'message (vector-ref res 1))))
+;; res)) ;; this is the *inner* vector? seriously? why?
+;; (if (debug:debug-mode 11)
+;; (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
+;; (print-call-chain (current-error-port))
+;; (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
+;; (debug:print 11 *default-log-port* " server call chain:")
+;; (pp (vector-ref res 1) (current-error-port))
+;; (signal (vector-ref res 0)))
+;; res))
+;; (signal (make-composite-condition
+;; (make-property-condition
+;; 'timeout
+;; 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))
+;;
+;; ;; careful closing of connections stored in *runremote*
+;; ;;
+;; (define (http-transport:close-connections runremote)
+;; (if (remote? runremote)
+;; (let ((api-dat (remote-api-uri runremote)))
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (print-call-chain *default-log-port*)
+;; (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
+;; (if (args:any-defined? "-server" "-execute" "-run")
+;; (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
+;; (if api-dat (close-connection! api-dat))
+;; (remote-conndat-set! runremote #f)
+;; #t))
+;; #f))
+;;
+;; ;; run http-transport:keep-running in a parallel thread to monitor that the db is being
+;; ;; used and to shutdown after sometime if it is not.
+;; ;;
+;; (define (http-transport:keep-running)
+;; ;; if none running or if > 20 seconds since
+;; ;; server last used then start shutdown
+;; ;; This thread waits for the server to come alive
+;; (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+;; (let* ((servinfofile #f)
+;; (sdat #f)
+;; (no-sync-db (db:open-no-sync-db))
+;; (tmp-area (common:get-db-tmp-area))
+;; (started-file (conc tmp-area "/.server-started"))
+;; (server-start-time (current-seconds))
+;; (server-info (let loop ((start-time (current-seconds))
+;; (changed #t)
+;; (last-sdat "not this"))
+;; (begin ;; let ((sdat #f))
+;; (thread-sleep! 0.01)
+;; (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! sdat *server-info*)
+;; (mutex-unlock! *heartbeat-mutex*)
+;; (if (and sdat
+;; (not changed)
+;; (> (- (current-seconds) start-time) 2))
+;; (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo"))
+;; (ipaddr (car sdat))
+;; (port (cadr sdat))
+;; (servinf (conc servinfodir"/"ipaddr":"port)))
+;; (set! servinfofile servinf)
+;; (if (not (file-exists? servinfodir))
+;; (create-directory servinfodir #t))
+;; (with-output-to-file servinf
+;; (lambda ()
+;; (let* ((serv-id (server:mk-signature)))
+;; (set! *server-id* serv-id)
+;; (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))
+;; (print "started: "(seconds->year-week/day-time (current-seconds))))))
+;; (set! *on-exit-procs* (cons
+;; (lambda ()
+;; (delete-file* servinf))
+;; *on-exit-procs*))
+;; ;; put data about this server into a simple flat file host.port
+;; (debug:print-info 0 *default-log-port* "Received server alive signature")
+;; sdat)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+;; (sleep 4)
+;; (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+;; (if sdat
+;; (let* ((ipaddr (car sdat))
+;; (port (cadr sdat))
+;; (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port)))
+;; (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+;; (exit))
+;; (exit)
+;; )
+;; (loop start-time
+;; (equal? sdat last-sdat)
+;; sdat)))))))
+;; (iface (car server-info))
+;; (port (cadr server-info))
+;; (last-access 0)
+;; (server-timeout (server:expiration-timeout))
+;; (server-going #f)
+;; (server-log-file (args:get-arg "-log"))) ;; always set when we are a server
+;;
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
+;; (with-output-to-file started-file (lambda ()(print (current-process-id)))))
+;;
+;; (let loop ((count 0)
+;; (server-state 'available)
+;; (bad-sync-count 0)
+;; (start-time (current-milliseconds)))
+;;
+;; ;; Use this opportunity to sync the tmp db to megatest.db
+;; (if (not server-going) ;; *dbstruct-dbs*
+;; (begin
+;; (debug:print 0 *default-log-port* "SERVER: dbprep")
+;; (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!!
+;; (set! server-going #t)
+;; (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
+;; (if (and no-sync-db
+;; (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
+;; (begin
+;; (if (common:low-noise-print 120 "sync-all-print")
+;; (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")))
+;; (db:all-db-sync *dbstruct-dbs*)
+;; )))
+;;
+;; ;; when things go wrong we don't want to be doing the various queries too often
+;; ;; so we strive to run this stuff only every four seconds or so.
+;; (let* ((sync-time (- (current-milliseconds) start-time))
+;; (rem-time (quotient (- 4000 sync-time) 1000)))
+;; (if (and (<= rem-time 4)
+;; (> rem-time 0))
+;; (thread-sleep! rem-time)))
+;;
+;; (if (< count 1) ;; 3x3 = 9 secs aprox
+;; (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
+;;
+;; ;; Check that iface and port have not changed (can happen if server port collides)
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! sdat *server-info*)
+;; (mutex-unlock! *heartbeat-mutex*)
+;;
+;; (if (not (equal? sdat (list iface port)))
+;; (let ((new-iface (car sdat))
+;; (new-port (cadr sdat)))
+;; (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+;; (set! iface new-iface)
+;; (set! port new-port)
+;; (if (not *server-id*)
+;; (set! *server-id* (server:mk-signature)))
+;; (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
+;; (flush-output *default-log-port*)))
+;;
+;; ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! last-access *db-last-access*)
+;; (mutex-unlock! *heartbeat-mutex*)
+;;
+;; (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
+;; (begin
+;; (if (not *server-id*)
+;; (set! *server-id* (server:mk-signature)))
+;; (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
+;; (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
+;; (flush-output *default-log-port*)))
+;; (if (common:low-noise-print 60 "dbstats")
+;; (begin
+;; (debug:print 0 *default-log-port* "Server stats:")
+;; (db:print-current-query-stats)))
+;; (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
+;; (cond
+;; ((and *server-run*
+;; (> (+ last-access server-timeout)
+;; (current-seconds)))
+;; (if (common:low-noise-print 120 "server continuing")
+;; (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
+;; (let ((curr-time (current-seconds)))
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
+;; (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
+;; (not *server-overloaded*)
+;; (file-exists? servinfofile))
+;; (change-file-times servinfofile curr-time curr-time)))
+;; (if (and (common:low-noise-print 120 "start new server")
+;; (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...")
+;; (server:kind-run *toppath*)
+;; (if (> *api-process-request-count* 100)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile)
+;; (delete-file* servinfofile)))))))
+;; (loop 0 server-state bad-sync-count (current-milliseconds)))
+;; (else
+;; (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+;; (http-transport:server-shutdown port)))))))
+;;
+;; (define (http-transport:server-shutdown port)
+;; (begin
+;; ;;(BB> "http-transport:server-shutdown called")
+;; (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+;; ;;
+;; ;; start_shutdown
+;; ;;
+;; (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
+;; (portlogger:open-run-close portlogger:set-port port "released")
+;; (thread-sleep! 1)
+;;
+;; ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
+;; ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*)
+;; ;; (debug:print-info 0 *default-log-port* "Average cached write time "
+;; ;; (if (eq? *number-of-writes* 0)
+;; ;; "n/a (no writes)"
+;; ;; (/ *writes-total-delay*
+;; ;; *number-of-writes*))
+;; ;; " ms")
+;; ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*)
+;; ;; (debug:print-info 0 *default-log-port* "Average non-cached time "
+;; ;; (if (eq? *number-non-write-queries* 0)
+;; ;; "n/a (no queries)"
+;; ;; (/ *total-non-write-delay*
+;; ;; *number-non-write-queries*))
+;; ;; " ms")
+;;
+;; (db:print-current-query-stats)
+;; #;(common:save-pkt `((action . exit)
+;; (T . server)
+;; (pid . ,(current-process-id)))
+;; *configdat* #t)
+;;
+;; ;; remove .servinfo file(s) here
+;;
+;; (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
+;; (exit)))
+;;
+;; ;; all routes though here end in exit ...
+;; ;;
+;; ;; start_server?
+;; ;;
+;; (define (http-transport:launch)
+;; ;; check the .servinfo directory, are there other servers running on this
+;; ;; or another host?
+;; (let* ((server-start-is-ok (server:minimal-check *toppath*)))
+;; (if (not server-start-is-ok)
+;; (begin
+;; (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.")
+;; (exit 1))))
+;;
+;; ;; check that a server start is in progress, pause or exit if so
+;; (let* ((th2 (make-thread (lambda ()
+;; (debug:print-info 0 *default-log-port* "Server run thread started")
+;; (http-transport:run
+;; (if (args:get-arg "-server")
+;; (args:get-arg "-server")
+;; "-")
+;; )) "Server run"))
+;; (th3 (make-thread (lambda ()
+;; (debug:print-info 0 *default-log-port* "Server monitor thread started")
+;; (http-transport:keep-running)
+;; "Keep running"))))
+;; (thread-start! th2)
+;; (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
+;; (thread-start! th3)
+;; (set! *didsomething* #t)
+;; (thread-join! th2)
+;; (exit)))
+;;
+;; ;; (define (http-transport:server-signal-handler signum)
+;; ;; (signal-mask! signum)
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (debug:print 0 *default-log-port* " ... exiting ...")
+;; ;; (let ((th1 (make-thread (lambda ()
+;; ;; (thread-sleep! 1))
+;; ;; "eat response"))
+;; ;; (th2 (make-thread (lambda ()
+;; ;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
+;; ;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
+;; ;; (debug:print 0 *default-log-port* " Done.")
+;; ;; (exit 4))
+;; ;; "exit on ^C timer")))
+;; ;; (thread-start! th2)
+;; ;; (thread-start! th1)
+;; ;; (thread-join! th2))))
+;;
+;; ;;===============================================
+;; ;; Java script
+;; ;;===============================================
+;; (define (http-transport:show-jquery)
+;; (let* ((data (tests:readlines *java-script-lib*)))
+;; (string-join data "\n")))
+;;
+;;
+;;
+;; ;;======================================================================
+;; ;; web pages
+;; ;;======================================================================
+;;
+;; (define (http-transport:html-test-log $)
+;; (let* ((run-id ($ 'runid))
+;; (test-item ($ 'testname))
+;; (parts (string-split test-item ":"))
+;; (test-name (car parts))
+;;
+;; (item-name (if (equal? (length parts) 1)
+;; ""
+;; (cadr parts))))
+;; ;(print $)
+;; (tests:get-test-log run-id test-name item-name)))
+;;
+;;
+;; (define (http-transport:html-dboard $)
+;; (let* ((page ($ 'page))
+;; (oup (open-output-string))
+;; (bdy "--------------------------")
+;;
+;; (ret (tests:dynamic-dboard page)))
+;; (s:output-new oup ret)
+;; (close-output-port oup)
+;;
+;; (set! bdy (get-output-string oup))
+;; (conc "Dashboard
" bdy "
" )))
+;;
+;; (define (http-transport:main-page)
+;; (let ((linkpath (root-path)))
+;; (conc "" (pathname-strip-directory *toppath*) "
"
+;; ""
+;; "Run area: " *toppath*
+;; "Server Stats
"
+;; (http-transport:stats-table)
+;; "
"
+;; (http-transport:runs linkpath)
+;; "
"
+;; ;; (http-transport:run-stats)
+;; ""
+;; )))
+;;
+;; (define (http-transport:stats-table)
+;; (mutex-lock! *heartbeat-mutex*)
+;; (let ((res
+;; (conc ""
+;; ;; "Max cached queries | " *max-cache-size* " |
"
+;; "Number of cached writes | " *number-of-writes* " |
"
+;; "Average cached write time | " (if (eq? *number-of-writes* 0)
+;; "n/a (no writes)"
+;; (/ *writes-total-delay*
+;; *number-of-writes*))
+;; " ms |
"
+;; "Number non-cached queries | " *number-non-write-queries* " |
"
+;; ;; "Average non-cached time | " (if (eq? *number-non-write-queries* 0)
+;; ;; "n/a (no queries)"
+;; ;; (/ *total-non-write-delay*
+;; ;; *number-non-write-queries*))
+;; " ms |
"
+;; "Last access | " (seconds->time-string *db-last-access*) " |
"
+;; "
")))
+;; (mutex-unlock! *heartbeat-mutex*)
+;; res))
+;;
+;; (define (http-transport:runs linkpath)
+;; (conc "Runs
"
+;; (string-intersperse
+;; (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
+;; (map (lambda (p)
+;; (conc "" p "
"))
+;; files))
+;; " ")))
+;;
+;; #;(define (http-transport:run-stats)
+;; (let ((stats (open-run-close db:get-running-stats #f)))
+;; (conc ""
+;; (string-intersperse
+;; (map (lambda (stat)
+;; (conc "" (car stat) " | " (cadr stat) " |
"))
+;; stats)
+;; " ")
+;; "
")))
+;;
+;; ;; http-server send-response
+;; ;; api:process-request
+;; ;; db:*
+;; ;;
+;; ;; NB// Runs on the server as part of the server loop
+;; ;;
+;; (define (api:process-request dbstruct $) ;; the $ is the request vars proc
+;; (debug:print 4 *default-log-port* "server-id:" *server-id*)
+;; (let* ((cmd ($ 'cmd))
+;; (paramsj ($ 'params))
+;; (key ($ 'key))
+;; (params (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
+;; (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key)
+;; (if (equal? key *server-id*)
+;; (begin
+;; (set! *api-process-request-count* (+ *api-process-request-count* 1))
+;; (let* ((resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
+;; (success (vector-ref resdat 0))
+;; (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
+;; (debug:print 4 *default-log-port* "res:" res)
+;; (if (not success)
+;; (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
+;; (if (> *api-process-request-count* *max-api-process-requests*)
+;; (set! *max-api-process-requests* *api-process-request-count*))
+;; (set! *api-process-request-count* (- *api-process-request-count* 1))
+;; ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
+;; ;; (rmt:dat->json-str
+;; ;; (if (or (string? res)
+;; ;; (list? res)
+;; ;; (number? res)
+;; ;; (boolean? res))
+;; ;; res
+;; ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
+;; (db:obj->string res transport: 'http)))
+;; (begin
+;; (debug:print 0 *default-log-port* "Server refused to process request. Server id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params)
+;; (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))
+;;
+;;
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -25,12 +25,10 @@
(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses margs))
(declare (uses runs))
(declare (uses launch))
-(declare (uses server))
-(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))
(declare (uses db))
@@ -43,10 +41,13 @@
(declare (uses env))
(declare (uses diff-report))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbmod.import))
+(declare (uses rmtmod))
+(declare (uses clientmod))
+(declare (uses servermod))
(declare (uses commonmod))
(declare (uses commonmod.import))
(declare (uses dbfile))
(declare (uses dbfile.import))
;; (declare (uses debugprint))
@@ -57,11 +58,12 @@
;; (declare (uses ftail))
;; (import ftail)
(import dbmod
commonmod
- dbfile)
+ dbfile
+ servermod)
(define *db* #f) ;; this is only for the repl, do not use in general!!!!
(include "common_records.scm")
(include "key_records.scm")
@@ -921,13 +923,29 @@
;;======================================================================
;; Server? Start up here.
;;
(if (args:get-arg "-server")
- (let ((tl (launch:setup)))
+ (let* ((tl (launch:setup))
+ (srvdat (server:setup tl))
+ (handler (lambda (dbstruct cmd params)
+ (api:execute-requests dbstruct (if (string? cmd)
+ (string->symbol cmd)
+ cmd)
+ (db:string->obj params)))))
+ (server:set-handler srvdat handler)
+ (srv-obj-to-str-set! srvdat db:obj->string)
+ (srv-str-to-obj-set! srvdat db:string->obj)
+ (srv-dbstruct-set! srvdat (db:setup #t))
+ (thread-join!
+ (thread-start! (make-thread
+ (lambda ()
+ (server:run srvdat)))))
+
;; (server:launch 0 'http)
- (http-transport:launch)
+ ;; (http-transport:launch) ;; NOTE: Need to replace this call
+
(set! *didsomething* #t)))
;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
Index: mt.scm
==================================================================
--- mt.scm
+++ mt.scm
@@ -24,13 +24,13 @@
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
-(declare (uses server))
+(declare (uses servermod))
(declare (uses runs))
-(declare (uses rmt))
+(declare (uses rmtmod))
;; (declare (uses filedb))
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
ADDED mtserv.scm
Index: mtserv.scm
==================================================================
--- /dev/null
+++ mtserv.scm
@@ -0,0 +1,117 @@
+; Copyright 2006-2017, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+;;
+
+;; (include "common.scm")
+;; (include "megatest-version.scm")
+
+;; fake out readline usage of toplevel-command
+(define (toplevel-command . a) #f)
+
+(use srfi-1 posix srfi-69 readline ;; regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras)
+ srfi-19 srfi-18 extras format regex regex-case
+ (prefix dbi dbi:)
+ matchable
+ )
+
+;; (declare (uses common))
+(declare (uses margs))
+(declare (uses configfmod))
+(declare (uses servermod))
+
+(include "megatest-version.scm")
+(include "megatest-fossil-hash.scm")
+
+(define help (conc "
+mtserv, part of the Megatest tool suite, documentation at http://www.kiatoa.com/fossils/megatest
+ version " megatest-version "
+ license GPL, Copyright Matt Welland 2006-2017
+
+Usage: mtserv action [options]
+ -h : this help
+ -manual : show the Megatest user manual
+ -version : print megatest version (currently " megatest-version ")
+ -start-dir path : switch to dir at start
+
+actions:
+
+ server : start server
+ repl : start repl
+
+Examples:
+
+Called as " (string-intersperse (argv) " ") "
+Version " megatest-version ", built from " megatest-fossil-hash ))
+ ;; first token is our action, but only if no leading dash
+
+(define *action* (if (and (> (length (argv)) 1)
+ (not (string-match "^\\-.*" (cadr (argv)))))
+ (cadr (argv))
+ #f))
+
+(define *remargs*
+ (args:get-args
+ (if *action* (cdr (argv)) (argv))
+ '("-log")
+ '("-h"
+ )
+ args:arg-hash
+ 0))
+
+(if (args:get-arg "-h")
+ (begin
+ (print help)
+ (exit)))
+
+(if (args:get-arg "-start-dir")
+ (let* ((start-dir (args:get-arg "-start-dir")))
+ (if (and (file-exists? start-dir)
+ (directory? start-dir))
+ (change-directory start-dir)
+ (begin
+ (print "FATAL: cannot find or access "start-dir)
+ (exit 1)))))
+
+(if *action*
+ (case (string->symbol *action*)
+ ((server) (server:run))
+ ((repl)
+ (import extras) ;; might not be needed
+ ;; (import csi)
+ (import readline)
+ (import apropos)
+ ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ...
+
+ (install-history-file (get-environment-variable "HOME") ".mtserv_history") ;; [homedir] [filename] [nlines])
+ (current-input-port (make-readline-port "mtserv> "))
+ (print "Starting repl...")
+ (repl))
+ ;; (if (args:get-arg "-load")
+ ;; (load (args:get-arg "-load"))
+ ;; (repl)))
+ (else
+ (print "Action \""*action*"\" not recognised.")
+ (print help)))
+ (begin
+ (print "No action provided.")
+ (print help)))
+
+#|
+(define mtconf (car (simple-setup #f)))
+(define dat (common:with-queue-db mtconf (lambda (conn)(get-pkts conn '()))))
+(pp (pkts#flatten-all dat '((cmd . ((parent . P)(url . M)))(runtype . ((parent . P)))) 'id 'group-id 'uuid 'parent 'pkt-type 'pkt 'processed))
+|#
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -22,1056 +22,1060 @@
(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
(declare (uses dbfile))
+
(include "common_records.scm")
;; (declare (uses rmtmod))
(import dbfile) ;; rmtmod)
-;;
-;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
-;;
-
-;; generate entries for ~/.megatestrc with the following
-;;
-;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
-
-;;======================================================================
-;; S U P P O R T F U N C T I O N S
-;;======================================================================
-
-;; if a server is either running or in the process of starting call client:setup
-;; else return #f to let the calling proc know that there is no server available
-;;
-(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
- (let* ((cinfo (if (remote? runremote)
- (remote-conndat runremote)
- #f)))
- (if cinfo
- cinfo
- (if (server:check-if-running areapath)
- (client:setup areapath runremote)
- #f))))
-
-(define (rmt:on-homehost? runremote)
- (let* ((hh-dat (remote-hh-dat runremote)))
- (if (pair? hh-dat)
- (cdr hh-dat)
- (begin
- (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
- #f))))
-
-
-;;======================================================================
-
-(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
-
-;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
-;;
-(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
-
- #;(common:telemetry-log (conc "rmt:"(->string cmd))
- payload: `((rid . ,rid)
- (params . ,params)))
-
- (if (> attemptnum 2)
- (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
-
- (cond
- ((> attemptnum 2) (thread-sleep! 0.05))
- ((> attemptnum 10) (thread-sleep! 0.5))
- ((> attemptnum 20) (thread-sleep! 1)))
- (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))
- (begin (server:run *toppath*) (thread-sleep! 3)))
-
-
- ;;DOT digraph megatest_state_status {
- ;;DOT ranksep=0;
- ;;DOT // rankdir=LR;
- ;;DOT node [shape="box"];
- ;;DOT "rmt:send-receive" -> MUTEXLOCK;
- ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
- ;; do all the prep locked under the rmt-mutex
- (mutex-lock! *rmt-mutex*)
-
- ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
- ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
- ;; 3. do the query, if on homehost use local access
- ;;
- (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
- (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
- (runremote (or area-dat
- *runremote*))
- (attemptnum (+ 1 attemptnum))
- (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
-
- ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
- ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
- ;; ensure we have a record for our connection for given area
- (if (not runremote) ;; can remove this one. should never get here.
- (begin
- (set! *runremote* (make-remote))
- (let* ((server-info (remote-server-info *runremote*)))
- (if server-info
- (begin
- (remote-server-url-set! *runremote* (server:record->url server-info))
- (remote-server-id-set! *runremote* (server:record->id server-info)))))
- (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
-
- ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
- ;; DOT SET_HOMEHOST -> MUTEXLOCK;
- ;; ensure we have a homehost record
- (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
- (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
- (let ((hh-data (server:choose-server areapath 'homehost)))
- (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
-
- ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
- (cond
- #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
- (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
- (set! *runremote* #f)
- ;; BUG: close-connections should go here?
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
-
- ;;DOT EXIT;
- ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
- ;; give up if more than 150 attempts
- ((> attemptnum 150)
- (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
- (exit 1))
-
- ;;DOT CASE2 [label="local\nreadonly\nquery"];
- ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
- ;;DOT CASE2 -> "rmt:open-qry-close-locally";
- ;; readonly mode, read request- handle it - case 2
- ((and readonly-mode
- (member cmd api:read-only-queries))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
- (rmt:open-qry-close-locally cmd 0 params)
- )
-
- ;;DOT CASE3 [label="write in\nread-only mode"];
- ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
- ;;DOT CASE3 -> "#f";
- ;; readonly mode, write request. Do nothing, return #f
- (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
-
- ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
- ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
- ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
- ;;
- ;;DOT CASE4 [label="reset\nconnection"];
- ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
- ;;DOT CASE4 -> "rmt:send-receive";
- ;; reset the connection if it has been unused too long
- ((and runremote
- (remote-conndat runremote)
- (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
- (+ (remote-last-access runremote)
- (remote-server-timeout runremote))))
- (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses in " (remote-server-timeout runremote) " seconds, forcing new connection.")
- (http-transport:close-connections runremote)
- ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
- ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE5 [label="local\nread"];
- ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
- ;;DOT CASE5 -> "rmt:open-qry-close-locally";
-
- ;; on homehost and this is a read
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (rmt:on-homehost? runremote)
- (member cmd api:read-only-queries)) ;; this is a read
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE6 [label="init\nremote"];
- ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
- ;;DOT CASE6 -> "rmt:send-receive";
- ;; on homehost and this is a write, we already have a server, but server has died
-
- ;; reinstate this keep-alive section but inject a time condition into the (add ...
- ;;
- ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost
- ;; (not (member cmd api:read-only-queries)) ;; this is a write
- ;; (remote-server-url runremote) ;; have a server
- ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
- ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
- ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
- ;; (set! *runremote* (make-remote))
- ;; (let* ((server-info (remote-server-info *runremote*)))
- ;; (if server-info
- ;; (begin
- ;; (remote-server-url-set! *runremote* (server:record->url server-info))
- ;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
- ;; (remote-force-server-set! runremote (common:force-server?))
- ;; (mutex-unlock! *rmt-mutex*)
- ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
- ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE7 [label="homehost\nwrite"];
- ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
- ;;DOT CASE7 -> "rmt:open-qry-close-locally";
- ;; on homehost and this is a write, we already have a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; on homehost
- (not (member cmd api:read-only-queries)) ;; this is a write
- (remote-server-url runremote)) ;; have a server
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE8 [label="force\nserver"];
- ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
- ;;DOT CASE8 -> "rmt:open-qry-close-locally";
- ;; on homehost, no server contact made and this is a write, passively start a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; have homehost
- (not (remote-server-url runremote)) ;; no connection yet
- (not (member cmd api:read-only-queries))) ;; not a read-only query
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
- (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
- (if server-info
- (begin
- (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
- (remote-server-id-set! runremote (server:record->id server-info)))
- (if (common:force-server?)
- (server:start-and-wait *toppath*)
- (server:kind-run *toppath*)))
- (remote-force-server-set! runremote (common:force-server?))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
- (rmt:open-qry-close-locally cmd 0 params)))
-
- ;;DOT CASE9 [label="force server\nnot on homehost"];
- ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
- ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
- ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
- (not (remote-conndat runremote)))
- (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
- (not (remote-conndat runremote)))) ;; and no connection
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
- (mutex-unlock! *rmt-mutex*)
- (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
- (server:start-and-wait *toppath*))
- (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
- (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
-
- ;;DOT CASE10 [label="on homehost"];
- ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
- ;;DOT CASE10 -> "rmt:open-qry-close-locally";
- ;; all set up if get this far, dispatch the query
- ((and (not (remote-force-server runremote))
- (cdr (remote-hh-dat runremote))) ;; we are on homehost
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
- (rmt:open-qry-close-locally cmd (if rid rid 0) params))
-
- ;;DOT CASE11 [label="send_receive"];
- ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
- ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
- ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
- ;; not on homehost, do server query
- (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
- ;;DOT }
-
-;; bunch of small functions factored out of send-receive to make debug easier
-;;
-
-(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
- ;; (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
- ;; (mutex-lock! *rmt-mutex*)
- (let* ((conninfo (remote-conndat runremote))
- (dat-in (condition-case ;; handling here has
- ;; caused a lot of
- ;; problems. However it
- ;; is needed to deal with
- ;; attemtped
- ;; communication to
- ;; servers that have gone
- ;; away
- (http-transport:client-api-send-receive 0 runremote cmd params)
- ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
- ((servermismatch) (vector #f "Server id mismatch" ))
- ((commfail)(vector #f "communications fail"))
- ((exn)(vector #f "other fail" (print-call-chain)))))
- (dat (if (and (vector? dat-in) ;; ... check it is a correct size
- (> (vector-length dat-in) 1))
- dat-in
- (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
- (success (if (vector? dat) (vector-ref dat 0) #f))
- (res (if (vector? dat) (vector-ref dat 1) #f)))
- (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
- (remote-last-access-set! runremote (current-seconds)) ;; refresh access time
- (begin
- (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
- (set! conninfo #f)
- (http-transport:close-connections runremote)))
- (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
- (mutex-unlock! *rmt-mutex*)
- (if success ;; success only tells us that the transport was
- ;; successful, have to examine the data to see if
- ;; there was a detected issue at the other end
- (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
- (begin
- (debug:print-error 0 *default-log-port* " dat=" dat)
- (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
- )))
-
-(define (rmt:print-db-stats)
- (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
- (debug:print 18 *default-log-port* "DB Stats\n========")
- (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
- (for-each (lambda (cmd)
- (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
- (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
- (sort (hash-table-keys *db-stats*)
- (lambda (a b)
- (> (vector-ref (hash-table-ref *db-stats* a) 0)
- (vector-ref (hash-table-ref *db-stats* b) 0)))))))
-
-(define (rmt:get-max-query-average run-id)
- (mutex-lock! *db-stats-mutex*)
- (let* ((runkey (conc "run-id=" run-id " "))
- (cmds (filter (lambda (x)
- (substring-index runkey x))
- (hash-table-keys *db-stats*)))
- (res (if (null? cmds)
- (cons 'none 0)
- (let loop ((cmd (car cmds))
- (tal (cdr cmds))
- (max-cmd (car cmds))
- (res 0))
- (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
- (tot (vector-ref cmd-dat 0))
- (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
- (currmax (max res curravg))
- (newmax-cmd (if (> curravg res) cmd max-cmd)))
- (if (null? tal)
- (if (> tot 10)
- (cons newmax-cmd currmax)
- (cons 'none 0))
- (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
- (mutex-unlock! *db-stats-mutex*)
- res))
-
-(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
- (let* ((qry-is-write (not (member cmd api:read-only-queries)))
- (db-file-path (db:dbfile-path)) ;; 0))
- (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
- (read-only (not (file-write-access? db-file-path)))
- (start (current-milliseconds))
- (resdat (if (not (and read-only qry-is-write))
- (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
- ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
- ;; exn ;; This is an attempt to detect that situation and recover gracefully
- ;; (begin
- ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
- ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
- (if (and (vector? v)
- (> (vector-length v) 1))
- (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
- newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
- (vector #t '()))) ;; ) ;; we could also check that the returned types are valid
- (vector #t '())))
- (success (vector-ref resdat 0))
- (res (vector-ref resdat 1))
- (duration (- (current-milliseconds) start)))
- (if (and read-only qry-is-write)
- (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
- (if (not success)
- (if (> remretries 0)
- (begin
- (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
- (thread-sleep! (/ (random 5000) 1000)) ;; some random delay
- (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
- (begin
- (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
- #f))
- (begin
- ;; (rmt:update-db-stats run-id cmd params duration)
- ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
- (if qry-is-write
- (let ((start-time (current-seconds)))
- (mutex-lock! *db-multi-sync-mutex*)
-/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client)
- (mutex-unlock! *db-multi-sync-mutex*)))))
- res))
-
-(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
- (let* ((run-id (if run-id run-id 0))
- (res (http-transport:client-api-send-receive run-id runremote cmd params)))
- (if (and res (vector-ref res 0))
- (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
- #f)))
-
-;;======================================================================
-;;
-;; A C T U A L A P I C A L L S
-;;
-;;======================================================================
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-(define (rmt:kill-server run-id)
- (rmt:send-receive 'kill-server run-id (list run-id)))
-
-(define (rmt:start-server run-id)
- (rmt:send-receive 'start-server 0 (list run-id)))
-
-;;======================================================================
-;; M I S C
-;;======================================================================
-
-(define (rmt:login run-id)
- (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))
-
-;; This login does no retries under the hood - it acts a bit like a ping.
-;; Deprecated for nmsg-transport.
-;;
-(define (rmt:login-no-auto-client-setup runremote)
- (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
-
-;; hand off a call to one of the db:queries statements
-;; added run-id to make looking up the correct db possible
-;;
-(define (rmt:general-call stmtname run-id . params)
- (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
-
-
-;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
-(define (rmt:get-latest-host-load hostname)
- (rmt:send-receive 'get-latest-host-load 0 (list hostname)))
-
-(define (rmt:sdb-qry qry val run-id)
- ;; add caching if qry is 'getid or 'getstr
- (rmt:send-receive 'sdb-qry run-id (list qry val)))
-
-;; NOT COMPLETED
-(define (rmt:runtests user run-id testpatt params)
- (rmt:send-receive 'runtests run-id testpatt))
-
-(define (rmt:get-run-record-ids target run keynames test-patt)
- (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))
-
-(define (rmt:get-changed-record-ids since-time)
- (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )
-
-(define (rmt:drop-all-triggers)
- (rmt:send-receive 'drop-all-triggers #f '()))
-
-(define (rmt:create-all-triggers)
- (rmt:send-receive 'create-all-triggers #f '()))
-
-;;======================================================================
-;; T E S T M E T A
-;;======================================================================
-
-(define (rmt:get-tests-tags)
- (rmt:send-receive 'get-tests-tags #f '()))
-
-;;======================================================================
-;; K E Y S
-;;======================================================================
-
-;; These require run-id because the values come from the run!
-;;
-(define (rmt:get-key-val-pairs run-id)
- (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
-
-(define (rmt:get-keys)
- (if *db-keys* *db-keys*
- (let ((res (rmt:send-receive 'get-keys #f '())))
- (set! *db-keys* res)
- res)))
-
-(define (rmt:get-keys-write) ;; dummy query to force server start
- (let ((res (rmt:send-receive 'get-keys-write #f '())))
- (set! *db-keys* res)
- res))
-
-;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
-;; to cache the resuls in a hash
-;;
-(define (rmt:get-key-vals run-id)
- (or (hash-table-ref/default *keyvals* run-id #f)
- (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
- (hash-table-set! *keyvals* run-id res)
- res)))
-
-(define (rmt:get-targets)
- (rmt:send-receive 'get-targets #f '()))
-
-(define (rmt:get-target run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-target run-id (list run-id)))
-
-(define (rmt:get-run-times runpatt targetpatt)
- (rmt:send-receive 'get-run-times #f (list runpatt targetpatt )))
-
-
-;;======================================================================
-;; T E S T S
-;;======================================================================
-
-;; Just some syntatic sugar
-(define (rmt:register-test run-id test-name item-path)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:general-call 'register-test run-id run-id test-name item-path))
-
-(define (rmt:get-test-id run-id testname item-path)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
-
-;; run-id is NOT used
-;;
-(define (rmt:get-test-info-by-id run-id test-id)
- (if (number? test-id)
- (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
- (begin
- (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
- (print-call-chain (current-error-port))
- #f)))
-
-(define (rmt:test-get-rundir-from-test-id run-id test-id)
- (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
-
-(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (let* ((test-path (if (string? work-area)
- work-area
- (rmt:test-get-rundir-from-test-id run-id test-id))))
- (debug:print 3 *default-log-port* "TEST PATH: " test-path)
- (open-test-db test-path)))
-
-;; WARNING: This currently bypasses the transaction wrapped writes system
-(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
-
-(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
-
-(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
- (assert (number? run-id) "FATAL: Run id required.")
- ;; (if (number? run-id)
- (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
- ;; (begin
- ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
- ;; (print-call-chain (current-error-port))
- ;; '())))
-
-(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
-
-;; get stuff via synchash
-(define (rmt:synchash-get run-id proc synckey keynum params)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
-
-(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
-
-;; IDEA: Threadify these - they spend a lot of time waiting ...
-;;
-(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
- (let ((multi-run-mutex (make-mutex))
- (run-id-list (if run-ids
- run-ids
- (rmt:get-all-run-ids)))
- (result '()))
- (if (null? run-id-list)
- '()
- (let loop ((hed (car run-id-list))
- (tal (cdr run-id-list))
- (threads '()))
- (if (> (length threads) 5)
- (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
- (let* ((newthread (make-thread
- (lambda ()
- (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
- (if (list? res)
- (begin
- (mutex-lock! multi-run-mutex)
- (set! result (append result res))
- (mutex-unlock! multi-run-mutex))
- (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
- (conc "multi-run-thread for run-id " hed)))
- (newthreads (cons newthread threads)))
- (thread-start! newthread)
- (thread-sleep! 0.05) ;; give that thread some time to start
- (if (null? tal)
- newthreads
- (loop (car tal)(cdr tal) newthreads))))))
- result))
-
+;; ;;
+;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
+;; ;;
+;;
+;; ;; generate entries for ~/.megatestrc with the following
+;; ;;
+;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
+;;
+;; ;;======================================================================
+;; ;; S U P P O R T F U N C T I O N S
+;; ;;======================================================================
+;;
+;; ;; if a server is either running or in the process of starting call client:setup
+;; ;; else return #f to let the calling proc know that there is no server available
+;; ;;
+;; (define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
+;; (let* ((cinfo (if (remote? runremote)
+;; (remote-conndat runremote)
+;; #f)))
+;; (if cinfo
+;; cinfo
+;; (if (server:check-if-running areapath)
+;; (client:setup areapath runremote)
+;; #f))))
+;;
+;; (define (rmt:on-homehost? runremote)
+;; (let* ((hh-dat (remote-hh-dat runremote)))
+;; (if (pair? hh-dat)
+;; (cdr hh-dat)
+;; (begin
+;; (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
+;; #f))))
+;;
+;;
+;; ;;======================================================================
+;;
+;; (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
+;;
+;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
+;;
+;; ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
+;; ;;
+;; ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
+;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd))
+;; ;; payload: `((rid . ,rid)
+;; ;; (params . ,params)))
+;; ;;
+;; ;; (if (> attemptnum 2)
+;; ;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
+;; ;;
+;; ;; (cond
+;; ;; ((> attemptnum 2) (thread-sleep! 0.05))
+;; ;; ((> attemptnum 10) (thread-sleep! 0.5))
+;; ;; ((> attemptnum 20) (thread-sleep! 1)))
+;; ;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))
+;; ;; (begin (server:run *toppath*) (thread-sleep! 3)))
+;; ;;
+;; ;;
+;; ;; ;;DOT digraph megatest_state_status {
+;; ;; ;;DOT ranksep=0;
+;; ;; ;;DOT // rankdir=LR;
+;; ;; ;;DOT node [shape="box"];
+;; ;; ;;DOT "rmt:send-receive" -> MUTEXLOCK;
+;; ;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
+;; ;; ;; do all the prep locked under the rmt-mutex
+;; ;; (mutex-lock! *rmt-mutex*)
+;; ;;
+;; ;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
+;; ;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
+;; ;; ;; 3. do the query, if on homehost use local access
+;; ;; ;;
+;; ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
+;; ;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
+;; ;; (runremote (or area-dat
+;; ;; *runremote*))
+;; ;; (attemptnum (+ 1 attemptnum))
+;; ;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
+;; ;;
+;; ;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
+;; ;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
+;; ;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
+;; ;; ;; ensure we have a record for our connection for given area
+;; ;; (if (not runremote) ;; can remove this one. should never get here.
+;; ;; (begin
+;; ;; (set! *runremote* (make-remote))
+;; ;; (let* ((server-info (remote-server-info *runremote*)))
+;; ;; (if server-info
+;; ;; (begin
+;; ;; (remote-server-url-set! *runremote* (server:record->url server-info))
+;; ;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
+;; ;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
+;; ;;
+;; ;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
+;; ;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
+;; ;; ;; DOT SET_HOMEHOST -> MUTEXLOCK;
+;; ;; ;; ensure we have a homehost record
+;; ;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
+;; ;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
+;; ;; (let ((hh-data (server:choose-server areapath 'homehost)))
+;; ;; (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
+;; ;;
+;; ;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
+;; ;; (cond
+;; ;; #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
+;; ;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
+;; ;; (set! *runremote* #f)
+;; ;; ;; BUG: close-connections should go here?
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
+;; ;;
+;; ;; ;;DOT EXIT;
+;; ;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
+;; ;; ;; give up if more than 150 attempts
+;; ;; ((> attemptnum 150)
+;; ;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
+;; ;; (exit 1))
+;; ;;
+;; ;; ;;DOT CASE2 [label="local\nreadonly\nquery"];
+;; ;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
+;; ;; ;;DOT CASE2 -> "rmt:open-qry-close-locally";
+;; ;; ;; readonly mode, read request- handle it - case 2
+;; ;; ((and readonly-mode
+;; ;; (member cmd api:read-only-queries))
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
+;; ;; (rmt:open-qry-close-locally cmd 0 params)
+;; ;; )
+;; ;;
+;; ;; ;;DOT CASE3 [label="write in\nread-only mode"];
+;; ;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
+;; ;; ;;DOT CASE3 -> "#f";
+;; ;; ;; readonly mode, write request. Do nothing, return #f
+;; ;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
+;; ;;
+;; ;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
+;; ;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
+;; ;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
+;; ;; ;;
+;; ;; ;;DOT CASE4 [label="reset\nconnection"];
+;; ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
+;; ;; ;;DOT CASE4 -> "rmt:send-receive";
+;; ;; ;; reset the connection if it has been unused too long
+;; ;; ((and runremote
+;; ;; ;; (remote-conndat runremote)
+;; ;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
+;; ;; (+ (remote-last-access runremote)
+;; ;; (remote-server-timeout runremote))))
+;; ;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
+;; ;; (http-transport:close-connections runremote)
+;; ;; ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
+;; ;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
+;; ;;
+;; ;; ;;DOT CASE5 [label="local\nread"];
+;; ;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
+;; ;; ;;DOT CASE5 -> "rmt:open-qry-close-locally";
+;; ;;
+;; ;; ;; on homehost and this is a read
+;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; ;; (rmt:on-homehost? runremote)
+;; ;; (member cmd api:read-only-queries)) ;; this is a read
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
+;; ;; (rmt:open-qry-close-locally cmd 0 params))
+;; ;;
+;; ;; ;;DOT CASE6 [label="init\nremote"];
+;; ;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
+;; ;; ;;DOT CASE6 -> "rmt:send-receive";
+;; ;; ;; on homehost and this is a write, we already have a server, but server has died
+;; ;;
+;; ;; ;; reinstate this keep-alive section but inject a time condition into the (add ...
+;; ;; ;;
+;; ;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost
+;; ;; ;; (not (member cmd api:read-only-queries)) ;; this is a write
+;; ;; ;; (remote-server-url runremote) ;; have a server
+;; ;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
+;; ;; ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
+;; ;; ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
+;; ;; ;; (set! *runremote* (make-remote))
+;; ;; ;; (let* ((server-info (remote-server-info *runremote*)))
+;; ;; ;; (if server-info
+;; ;; ;; (begin
+;; ;; ;; (remote-server-url-set! *runremote* (server:record->url server-info))
+;; ;; ;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
+;; ;; ;; (remote-force-server-set! runremote (common:force-server?))
+;; ;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
+;; ;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
+;; ;;
+;; ;; ;;DOT CASE7 [label="homehost\nwrite"];
+;; ;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
+;; ;; ;;DOT CASE7 -> "rmt:open-qry-close-locally";
+;; ;; ;; on homehost and this is a write, we already have a server
+;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; ;; (cdr (remote-hh-dat runremote)) ;; on homehost
+;; ;; (not (member cmd api:read-only-queries)) ;; this is a write
+;; ;; (remote-server-url runremote)) ;; have a server
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
+;; ;; (rmt:open-qry-close-locally cmd 0 params))
+;; ;;
+;; ;; ;;DOT CASE8 [label="force\nserver"];
+;; ;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
+;; ;; ;;DOT CASE8 -> "rmt:open-qry-close-locally";
+;; ;; ;; on homehost, no server contact made and this is a write, passively start a server
+;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; ;; (cdr (remote-hh-dat runremote)) ;; have homehost
+;; ;; (not (remote-server-url runremote)) ;; no connection yet
+;; ;; (not (member cmd api:read-only-queries))) ;; not a read-only query
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
+;; ;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
+;; ;; (if server-info
+;; ;; (begin
+;; ;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
+;; ;; (remote-server-id-set! runremote (server:record->id server-info)))
+;; ;; (if (common:force-server?)
+;; ;; (server:start-and-wait *toppath*)
+;; ;; (server:kind-run *toppath*)))
+;; ;; (remote-force-server-set! runremote (common:force-server?))
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
+;; ;; (rmt:open-qry-close-locally cmd 0 params)))
+;; ;;
+;; ;; ;;DOT CASE9 [label="force server\nnot on homehost"];
+;; ;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
+;; ;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
+;; ;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
+;; ;; (not (remote-conndat runremote)))
+;; ;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
+;; ;; (not (remote-conndat runremote)))) ;; and no connection
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
+;; ;; (server:start-and-wait *toppath*))
+;; ;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
+;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
+;; ;;
+;; ;; ;;DOT CASE10 [label="on homehost"];
+;; ;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
+;; ;; ;;DOT CASE10 -> "rmt:open-qry-close-locally";
+;; ;; ;; all set up if get this far, dispatch the query
+;; ;; ((and (not (remote-force-server runremote))
+;; ;; (cdr (remote-hh-dat runremote))) ;; we are on homehost
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
+;; ;; (rmt:open-qry-close-locally cmd (if rid rid 0) params))
+;; ;;
+;; ;; ;;DOT CASE11 [label="send_receive"];
+;; ;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
+;; ;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
+;; ;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
+;; ;; ;; not on homehost, do server query
+;; ;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
+;; ;; ;;DOT }
+;; ;;
+;; ;; ;; bunch of small functions factored out of send-receive to make debug easier
+;; ;; ;;
+;; ;;
+;; ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
+;; ;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
+;; ;; ;; (mutex-lock! *rmt-mutex*)
+;; ;; (let* ((conninfo (remote-conndat runremote))
+;; ;; (dat-in (condition-case ;; handling here has
+;; ;; ;; caused a lot of
+;; ;; ;; problems. However it
+;; ;; ;; is needed to deal with
+;; ;; ;; attemtped
+;; ;; ;; communication to
+;; ;; ;; servers that have gone
+;; ;; ;; away
+;; ;; (http-transport:client-api-send-receive 0 runremote cmd params)
+;; ;; ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
+;; ;; ((servermismatch) (vector #f "Server id mismatch" ))
+;; ;; ((commfail)(vector #f "communications fail"))
+;; ;; ((exn)(vector #f "other fail" (print-call-chain)))))
+;; ;; (dat (if (and (vector? dat-in) ;; ... check it is a correct size
+;; ;; (> (vector-length dat-in) 1))
+;; ;; dat-in
+;; ;; (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
+;; ;; (success (if (vector? dat) (vector-ref dat 0) #f))
+;; ;; (res (if (vector? dat) (vector-ref dat 1) #f)))
+;; ;; (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
+;; ;; (remote-last-access-set! runremote (current-seconds)) ;; refresh access time
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
+;; ;; (set! conninfo #f)
+;; ;; (http-transport:close-connections runremote)))
+;; ;; (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (if success ;; success only tells us that the transport was
+;; ;; ;; successful, have to examine the data to see if
+;; ;; ;; there was a detected issue at the other end
+;; ;; (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
+;; ;; (begin
+;; ;; (debug:print-error 0 *default-log-port* " dat=" dat)
+;; ;; (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
+;; ;; )))
+;;
+;; (define (rmt:print-db-stats)
+;; (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+;; (debug:print 18 *default-log-port* "DB Stats\n========")
+;; (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+;; (for-each (lambda (cmd)
+;; (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+;; (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+;; (sort (hash-table-keys *db-stats*)
+;; (lambda (a b)
+;; (> (vector-ref (hash-table-ref *db-stats* a) 0)
+;; (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+;;
+;; (define (rmt:get-max-query-average run-id)
+;; (mutex-lock! *db-stats-mutex*)
+;; (let* ((runkey (conc "run-id=" run-id " "))
+;; (cmds (filter (lambda (x)
+;; (substring-index runkey x))
+;; (hash-table-keys *db-stats*)))
+;; (res (if (null? cmds)
+;; (cons 'none 0)
+;; (let loop ((cmd (car cmds))
+;; (tal (cdr cmds))
+;; (max-cmd (car cmds))
+;; (res 0))
+;; (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+;; (tot (vector-ref cmd-dat 0))
+;; (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+;; (currmax (max res curravg))
+;; (newmax-cmd (if (> curravg res) cmd max-cmd)))
+;; (if (null? tal)
+;; (if (> tot 10)
+;; (cons newmax-cmd currmax)
+;; (cons 'none 0))
+;; (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+;; (mutex-unlock! *db-stats-mutex*)
+;; res))
+;;
+;; (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
+;; (let* ((qry-is-write (not (member cmd api:read-only-queries)))
+;; (db-file-path (db:dbfile-path)) ;; 0))
+;; (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
+;; (read-only (not (file-write-access? db-file-path)))
+;; (start (current-milliseconds))
+;; (resdat (if (not (and read-only qry-is-write))
+;; (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
+;; ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
+;; ;; exn ;; This is an attempt to detect that situation and recover gracefully
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
+;; ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
+;; (if (and (vector? v)
+;; (> (vector-length v) 1))
+;; (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
+;; newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
+;; (vector #t '()))) ;; ) ;; we could also check that the returned types are valid
+;; (vector #t '())))
+;; (success (vector-ref resdat 0))
+;; (res (vector-ref resdat 1))
+;; (duration (- (current-milliseconds) start)))
+;; (if (and read-only qry-is-write)
+;; (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
+;; (if (not success)
+;; (if (> remretries 0)
+;; (begin
+;; (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
+;; (thread-sleep! (/ (random 5000) 1000)) ;; some random delay
+;; (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
+;; (begin
+;; (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
+;; #f))
+;; (begin
+;; ;; (rmt:update-db-stats run-id cmd params duration)
+;; ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
+;; (if qry-is-write
+;; (let ((start-time (current-seconds)))
+;; (mutex-lock! *db-multi-sync-mutex*)
+;; / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client)
+;; (mutex-unlock! *db-multi-sync-mutex*)))))
+;; res))
+;;
+;; (define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
+;; (let* ((run-id (if run-id run-id 0))
+;; (res (http-transport:client-api-send-receive run-id runremote cmd params)))
+;; (if (and res (vector-ref res 0))
+;; (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
+;; #f)))
+;;
+;; ;;======================================================================
+;; ;;
+;; ;; A C T U A L A P I C A L L S
+;; ;;
+;; ;;======================================================================
+;;
+;; ;;======================================================================
+;; ;; S E R V E R
+;; ;;======================================================================
+;;
+;; (define (rmt:kill-server run-id)
+;; (rmt:send-receive 'kill-server run-id (list run-id)))
+;;
+;; (define (rmt:start-server run-id)
+;; (rmt:send-receive 'start-server 0 (list run-id)))
+;;
+;; ;;======================================================================
+;; ;; M I S C
+;; ;;======================================================================
+;;
+;; (define (rmt:login run-id)
+;; (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))
+;;
+;; ;; This login does no retries under the hood - it acts a bit like a ping.
+;; ;; Deprecated for nmsg-transport.
+;; ;;
+;; (define (rmt:login-no-auto-client-setup runremote)
+;; (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
+;;
+;; ;; hand off a call to one of the db:queries statements
+;; ;; added run-id to make looking up the correct db possible
+;; ;;
+;; (define (rmt:general-call stmtname run-id . params)
+;; (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
+;;
+;;
+;; ;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
+;; (define (rmt:get-latest-host-load hostname)
+;; (rmt:send-receive 'get-latest-host-load 0 (list hostname)))
+;;
+;; (define (rmt:sdb-qry qry val run-id)
+;; ;; add caching if qry is 'getid or 'getstr
+;; (rmt:send-receive 'sdb-qry run-id (list qry val)))
+;;
+;; ;; NOT COMPLETED
+;; (define (rmt:runtests user run-id testpatt params)
+;; (rmt:send-receive 'runtests run-id testpatt))
+;;
+;; (define (rmt:get-run-record-ids target run keynames test-patt)
+;; (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))
+;;
+;; (define (rmt:get-changed-record-ids since-time)
+;; (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )
+;;
+;; (define (rmt:drop-all-triggers)
+;; (rmt:send-receive 'drop-all-triggers #f '()))
+;;
+;; (define (rmt:create-all-triggers)
+;; (rmt:send-receive 'create-all-triggers #f '()))
+;;
+;; ;;======================================================================
+;; ;; T E S T M E T A
+;; ;;======================================================================
+;;
+;; (define (rmt:get-tests-tags)
+;; (rmt:send-receive 'get-tests-tags #f '()))
+;;
+;; ;;======================================================================
+;; ;; K E Y S
+;; ;;======================================================================
+;;
+;; ;; These require run-id because the values come from the run!
+;; ;;
+;; (define (rmt:get-key-val-pairs run-id)
+;; (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
+;;
+;; (define (rmt:get-keys)
+;; (if *db-keys* *db-keys*
+;; (let ((res (rmt:send-receive 'get-keys #f '())))
+;; (set! *db-keys* res)
+;; res)))
+;;
+;; (define (rmt:get-keys-write) ;; dummy query to force server start
+;; (let ((res (rmt:send-receive 'get-keys-write #f '())))
+;; (set! *db-keys* res)
+;; res))
+;;
+;; ;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
+;; ;; to cache the resuls in a hash
+;; ;;
+;; (define (rmt:get-key-vals run-id)
+;; (or (hash-table-ref/default *keyvals* run-id #f)
+;; (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
+;; (hash-table-set! *keyvals* run-id res)
+;; res)))
+;;
+;; (define (rmt:get-targets)
+;; (rmt:send-receive 'get-targets #f '()))
+;;
+;; (define (rmt:get-target run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-target run-id (list run-id)))
+;;
+;; (define (rmt:get-run-times runpatt targetpatt)
+;; (rmt:send-receive 'get-run-times #f (list runpatt targetpatt )))
+;;
+;;
+;; ;;======================================================================
+;; ;; T E S T S
+;; ;;======================================================================
+;;
+;; ;; Just some syntatic sugar
+;; (define (rmt:register-test run-id test-name item-path)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:general-call 'register-test run-id run-id test-name item-path))
+;;
+;; (define (rmt:get-test-id run-id testname item-path)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
+;;
+;; ;; run-id is NOT used
+;; ;;
+;; (define (rmt:get-test-info-by-id run-id test-id)
+;; (if (number? test-id)
+;; (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
+;; (print-call-chain (current-error-port))
+;; #f)))
+;;
+;; (define (rmt:test-get-rundir-from-test-id run-id test-id)
+;; (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
+;;
+;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (let* ((test-path (if (string? work-area)
+;; work-area
+;; (rmt:test-get-rundir-from-test-id run-id test-id))))
+;; (debug:print 3 *default-log-port* "TEST PATH: " test-path)
+;; (open-test-db test-path)))
+;;
+;; ;; WARNING: This currently bypasses the transaction wrapped writes system
+;; (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
+;;
+;; (define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
+;;
+;; (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; ;; (if (number? run-id)
+;; (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
+;; ;; (begin
+;; ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
+;; ;; (print-call-chain (current-error-port))
+;; ;; '())))
+;;
+;; (define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
+;;
+;; ;; get stuff via synchash
+;; (define (rmt:synchash-get run-id proc synckey keynum params)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
+;;
+;; (define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
+;;
;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
;; ;;
;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
-;; (let ((run-id-list (if run-ids
+;; (let ((multi-run-mutex (make-mutex))
+;; (run-id-list (if run-ids
;; run-ids
-;; (rmt:get-all-run-ids))))
-;; (apply append (map (lambda (run-id)
-;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
-;; run-id-list))))
-
-(define (rmt:delete-test-records run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
-
-(define (rmt:test-set-state-status run-id test-id state status msg)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))
-
-(define (rmt:test-toplevel-num-items run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))
-
-;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
-;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
-
-(define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
-
-(define (rmt:test-get-logfile-info run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
-
-(define (rmt:test-get-records-for-index-file run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
-
-(define (rmt:get-testinfo-state-status run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
-
-(define (rmt:test-set-log! run-id test-id logf)
- (assert (number? run-id) "FATAL: Run id required.")
- (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
-
-(define (rmt:test-set-top-process-pid run-id test-id pid)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))
-
-(define (rmt:test-get-top-process-pid run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))
-
-(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
- (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
-
-;; NOTE: This will open and access ALL run databases.
-;;
-(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
- (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
- (apply append
- (map (lambda (run-id)
- (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
- run-ids))))
-
-(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
-
-(define (rmt:get-count-tests-running-for-run-id run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
-
-(define (rmt:get-not-completed-cnt run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
-
-
-;; Statistical queries
-
-(define (rmt:get-count-tests-running run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
-
-(define (rmt:get-count-tests-running-for-testname run-id testname)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
-
-(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
-
-;; state and status are extra hints not usually used in the calculation
-;;
-(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
-
-(define (rmt:set-state-status-and-roll-up-run run-id state status)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
-
-
-(define (rmt:update-pass-fail-counts run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
-
-(define (rmt:top-test-set-per-pf-counts run-id test-name)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))
-
-(define (rmt:get-raw-run-stats run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))
-
-(define (rmt:get-test-times runname target)
- (rmt:send-receive 'get-test-times #f (list runname target )))
-
-;;======================================================================
-;; R U N S
-;;======================================================================
-
-;; BUG - LOOK AT HOW THIS WORKS!!!
-;;
-(define (rmt:get-run-info run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-info #f (list run-id)))
-
-(define (rmt:get-num-runs runpatt)
- (rmt:send-receive 'get-num-runs #f (list runpatt)))
-
-(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
- (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys)))
-
-;; Use the special run-id == #f scenario here since there is no run yet
-(define (rmt:register-run keyvals runname state status user contour)
- (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
-
-(define (rmt:get-run-name-from-id run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-name-from-id #f (list run-id)))
-
-(define (rmt:delete-run run-id)
- (rmt:send-receive 'delete-run #f (list run-id)))
-
-(define (rmt:update-run-stats run-id stats)
- (rmt:send-receive 'update-run-stats #f (list run-id stats)))
-
-(define (rmt:delete-old-deleted-test-records)
- (rmt:send-receive 'delete-old-deleted-test-records #f '()))
-
-(define (rmt:get-runs runpatt count offset keypatts)
- (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))
-
-(define (rmt:simple-get-runs runpatt count offset target last-update)
- (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update)))
-
-(define (rmt:get-all-run-ids)
- (rmt:send-receive 'get-all-run-ids #f '()))
-
-(define (rmt:get-prev-run-ids run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
-
-(define (rmt:lock/unlock-run run-id lock unlock user)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
-
-;; set/get status
-(define (rmt:get-run-status run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-status #f (list run-id)))
-
-(define (rmt:get-run-state run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-run-state #f (list run-id)))
-
-
-(define (rmt:set-run-status run-id run-status #!key (msg #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
-
-(define (rmt:set-run-state-status run-id state status )
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'set-run-state-status #f (list run-id state status)))
-
-(define (rmt:update-tesdata-on-repilcate-db old-lt new-lt)
-(rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt)))
-
-(define (rmt:update-run-event_time run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'update-run-event_time #f (list run-id)))
-
-(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default
- (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))
-
-(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
- (assert (number? run-id) "FATAL: Run id required.")
- ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
- (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )
-
-(define (rmt:get-main-run-stats run-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-main-run-stats #f (list run-id)))
-
-(define (rmt:get-var varname)
- (rmt:send-receive 'get-var #f (list varname)))
-
-(define (rmt:del-var varname)
- (rmt:send-receive 'del-var #f (list varname)))
-
-(define (rmt:set-var varname value)
- (rmt:send-receive 'set-var #f (list varname value)))
-
-(define (rmt:inc-var varname)
- (rmt:send-receive 'inc-var #f (list varname)))
-
-(define (rmt:dec-var varname)
- (rmt:send-receive 'dec-var #f (list varname)))
-
-(define (rmt:add-var varname value)
- (rmt:send-receive 'add-var #f (list varname value)))
-
-;;======================================================================
-;; M U L T I R U N Q U E R I E S
-;;======================================================================
-
-;; Need to move this to multi-run section and make associated changes
-(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
- (let ((run-ids (rmt:get-all-run-ids)))
- (for-each (lambda (run-id)
- (rmt:find-and-mark-incomplete run-id ovr-deadtime))
- run-ids)))
-
-;; get the previous record for when this test was run where all keys match but runname
-;; returns #f if no such test found, returns a single test record if found
-;;
-;; Run this at the client end since we have to connect to multiple run-id dbs
-;;
-(define (rmt:get-previous-test-run-record run-id test-name item-path)
- (let* ((keyvals (rmt:get-key-val-pairs run-id))
- (keys (rmt:get-keys))
- (selstr (string-intersperse keys ","))
- (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
- (if (not keyvals)
- #f
- (let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
- ;; for each run starting with the most recent look to see if there is a matching test
- ;; if found then return that matching test record
- (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
- (if (null? prev-run-ids) #f
- (let loop ((hed (car prev-run-ids))
- (tal (cdr prev-run-ids)))
- (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
- #f #f #f ;; offset limit not-in hide/not-hide
- #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
- (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
- (if (and (null? results)
- (not (null? tal)))
- (loop (car tal)(cdr tal))
- (if (null? results) #f
- (car results))))))))))
-
-(define (rmt:get-run-stats)
- (rmt:send-receive 'get-run-stats #f '()))
-
-;;======================================================================
-;; S T E P S
-;;======================================================================
-
-;; Getting steps is more complicated.
-;;
-;; If given work area
-;; 1. Find the testdat.db file
-;; 2. Open the testdat.db file and do the query
-;; If not given the work area
-;; 1. Do a remote call to get the test path
-;; 2. Continue as above
-;;
-;;(define (rmt:get-steps-for-test run-id test-id)
-;; (rmt:send-receive 'get-steps-data run-id (list test-id)))
-
-(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
- (assert (number? run-id) "FATAL: Run id required.")
- (let* ((state (items:check-valid-items "state" state-in))
- (status (items:check-valid-items "status" status-in)))
- (if (or (not state)(not status))
- (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
- " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
- (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
-
-
-(define (rmt:delete-steps-for-test! run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id)))
-
-(define (rmt:get-steps-for-test run-id test-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))
-
-(define (rmt:get-steps-info-by-id run-id test-step-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id)))
-
-;;======================================================================
-;; T E S T D A T A
-;;======================================================================
-
-(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
-
-(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f))
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt)))
-
-(define (rmt:get-data-info-by-id run-id test-data-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id)))
-
-(define (rmt:testmeta-add-record testname)
- (rmt:send-receive 'testmeta-add-record #f (list testname)))
-
-(define (rmt:testmeta-get-record testname)
- (rmt:send-receive 'testmeta-get-record #f (list testname)))
-
-(define (rmt:testmeta-update-field test-name fld val)
- (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))
-
-(define (rmt:test-data-rollup run-id test-id status)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
-
-(define (rmt:csv->test-data run-id test-id csvdata)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))
-
-;;======================================================================
-;; T A S K S
-;;======================================================================
-
-(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
- (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))
-
-(define (rmt:tasks-add action owner target runname testpatt params)
- (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))
-
-(define (rmt:tasks-set-state-given-param-key param-key new-state)
- (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state)))
-
-(define (rmt:tasks-get-last target runname)
- (rmt:send-receive 'tasks-get-last #f (list target runname)))
-
-;;======================================================================
-;; N O S Y N C D B
-;;======================================================================
-
-(define (rmt:no-sync-set var val)
- (rmt:send-receive 'no-sync-set #f `(,var ,val)))
-
-(define (rmt:no-sync-get/default var default)
- (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
-
-(define (rmt:no-sync-del! var)
- (rmt:send-receive 'no-sync-del! #f `(,var)))
-
-(define (rmt:no-sync-get-lock keyname)
- (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
-
-;;======================================================================
-;; A R C H I V E S
-;;======================================================================
-
-(define (rmt:archive-get-allocations testname itempath dneeded)
- (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))
-
-(define (rmt:archive-register-block-name bdisk-id archive-path)
- (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))
-
-(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
- (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey)))
-
-(define (rmt:archive-register-disk bdisk-name bdisk-path df)
- (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))
-
-(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
- (assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
-
-(define (rmt:test-get-archive-block-info archive-block-id)
- (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
-
-
-(define (rmtmod:calc-ro-mode runremote *toppath*)
- (if (and runremote
- (remote-ro-mode-checked runremote))
- (remote-ro-mode runremote)
- (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
- (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
- (if runremote
- (begin
- (remote-ro-mode-set! runremote ro-mode)
- (remote-ro-mode-checked-set! runremote #t)
- ro-mode)
- ro-mode))))
-
-(define (extras-readonly-mode rmt-mutex log-port cmd params)
- (mutex-unlock! rmt-mutex)
- (debug:print-info 12 log-port "rmt:send-receive, case 3")
- (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
- #f)
-
-(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
- (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
- (mutex-lock! *rmt-mutex*)
- (http-transport:close-connections runremote)
- (remote-server-url-set! runremote #f)
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
- (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
-
-(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
- (if (and (vector? res)
- (eq? (vector-length res) 2)
- (eq? (vector-ref res 1) 'overloaded)) ;; since we are
- ;; looking at the
- ;; data to carry the
- ;; error we'll use a
- ;; fairly obtuse
- ;; combo to minimise
- ;; the chances of
- ;; some sort of
- ;; collision. this
- ;; is the case where
- ;; the returned data
- ;; is bad or the
- ;; server is
- ;; overloaded and we
- ;; want to ease off
- ;; the queries
- (let ((wait-delay (+ attemptnum (* attemptnum 10))))
- (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
- (mutex-lock! *rmt-mutex*)
- (http-transport:close-connections runremote)
- (set! *runremote* #f) ;; force starting over
- (mutex-unlock! *rmt-mutex*)
- (thread-sleep! wait-delay)
- (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
- res)) ;; All good, return res
-
-#;(set-functions rmt:send-receive remote-server-url-set!
- http-transport:close-connections remote-conndat-set!
- debug:print debug:print-info
- remote-ro-mode remote-ro-mode-set!
- remote-ro-mode-checked-set! remote-ro-mode-checked)
+;; (rmt:get-all-run-ids)))
+;; (result '()))
+;; (if (null? run-id-list)
+;; '()
+;; (let loop ((hed (car run-id-list))
+;; (tal (cdr run-id-list))
+;; (threads '()))
+;; (if (> (length threads) 5)
+;; (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
+;; (let* ((newthread (make-thread
+;; (lambda ()
+;; (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
+;; (if (list? res)
+;; (begin
+;; (mutex-lock! multi-run-mutex)
+;; (set! result (append result res))
+;; (mutex-unlock! multi-run-mutex))
+;; (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
+;; (conc "multi-run-thread for run-id " hed)))
+;; (newthreads (cons newthread threads)))
+;; (thread-start! newthread)
+;; (thread-sleep! 0.05) ;; give that thread some time to start
+;; (if (null? tal)
+;; newthreads
+;; (loop (car tal)(cdr tal) newthreads))))))
+;; result))
+;;
+;; ;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
+;; ;; ;;
+;; ;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
+;; ;; (let ((run-id-list (if run-ids
+;; ;; run-ids
+;; ;; (rmt:get-all-run-ids))))
+;; ;; (apply append (map (lambda (run-id)
+;; ;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
+;; ;; run-id-list))))
+;;
+;; (define (rmt:delete-test-records run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
+;;
+;; (define (rmt:test-set-state-status run-id test-id state status msg)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))
+;;
+;; (define (rmt:test-toplevel-num-items run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))
+;;
+;; ;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
+;; ;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
+;;
+;; (define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
+;;
+;; (define (rmt:test-get-logfile-info run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
+;;
+;; (define (rmt:test-get-records-for-index-file run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
+;;
+;; (define (rmt:get-testinfo-state-status run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
+;;
+;; (define (rmt:test-set-log! run-id test-id logf)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
+;;
+;; (define (rmt:test-set-top-process-pid run-id test-id pid)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))
+;;
+;; (define (rmt:test-get-top-process-pid run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))
+;;
+;; (define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
+;; (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
+;;
+;; ;; NOTE: This will open and access ALL run databases.
+;; ;;
+;; (define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
+;; (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
+;; (apply append
+;; (map (lambda (run-id)
+;; (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
+;; run-ids))))
+;;
+;; (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
+;;
+;; (define (rmt:get-count-tests-running-for-run-id run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
+;;
+;; (define (rmt:get-not-completed-cnt run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
+;;
+;;
+;; ;; Statistical queries
+;;
+;; (define (rmt:get-count-tests-running run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
+;;
+;; (define (rmt:get-count-tests-running-for-testname run-id testname)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
+;;
+;; (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
+;;
+;; ;; state and status are extra hints not usually used in the calculation
+;; ;;
+;; (define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
+;;
+;; (define (rmt:set-state-status-and-roll-up-run run-id state status)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
+;;
+;;
+;; (define (rmt:update-pass-fail-counts run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
+;;
+;; (define (rmt:top-test-set-per-pf-counts run-id test-name)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))
+;;
+;; (define (rmt:get-raw-run-stats run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))
+;;
+;; (define (rmt:get-test-times runname target)
+;; (rmt:send-receive 'get-test-times #f (list runname target )))
+;;
+;; ;;======================================================================
+;; ;; R U N S
+;; ;;======================================================================
+;;
+;; ;; BUG - LOOK AT HOW THIS WORKS!!!
+;; ;;
+;; (define (rmt:get-run-info run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-info #f (list run-id)))
+;;
+;; (define (rmt:get-num-runs runpatt)
+;; (rmt:send-receive 'get-num-runs #f (list runpatt)))
+;;
+;; (define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
+;; (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys)))
+;;
+;; ;; Use the special run-id == #f scenario here since there is no run yet
+;; (define (rmt:register-run keyvals runname state status user contour)
+;; (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
+;;
+;; (define (rmt:get-run-name-from-id run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-name-from-id #f (list run-id)))
+;;
+;; (define (rmt:delete-run run-id)
+;; (rmt:send-receive 'delete-run #f (list run-id)))
+;;
+;; (define (rmt:update-run-stats run-id stats)
+;; (rmt:send-receive 'update-run-stats #f (list run-id stats)))
+;;
+;; (define (rmt:delete-old-deleted-test-records)
+;; (rmt:send-receive 'delete-old-deleted-test-records #f '()))
+;;
+;; (define (rmt:get-runs runpatt count offset keypatts)
+;; (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))
+;;
+;; (define (rmt:simple-get-runs runpatt count offset target last-update)
+;; (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update)))
+;;
+;; (define (rmt:get-all-run-ids)
+;; (rmt:send-receive 'get-all-run-ids #f '()))
+;;
+;; (define (rmt:get-prev-run-ids run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
+;;
+;; (define (rmt:lock/unlock-run run-id lock unlock user)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
+;;
+;; ;; set/get status
+;; (define (rmt:get-run-status run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-status #f (list run-id)))
+;;
+;; (define (rmt:get-run-state run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-run-state #f (list run-id)))
+;;
+;;
+;; (define (rmt:set-run-status run-id run-status #!key (msg #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
+;;
+;; (define (rmt:set-run-state-status run-id state status )
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'set-run-state-status #f (list run-id state status)))
+;;
+;; (define (rmt:update-tesdata-on-repilcate-db old-lt new-lt)
+;; (rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt)))
+;;
+;; (define (rmt:update-run-event_time run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'update-run-event_time #f (list run-id)))
+;;
+;; (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default
+;; (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))
+;;
+;; (define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
+;; (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )
+;;
+;; (define (rmt:get-main-run-stats run-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-main-run-stats #f (list run-id)))
+;;
+;; (define (rmt:get-var varname)
+;; (rmt:send-receive 'get-var #f (list varname)))
+;;
+;; (define (rmt:del-var varname)
+;; (rmt:send-receive 'del-var #f (list varname)))
+;;
+;; (define (rmt:set-var varname value)
+;; (rmt:send-receive 'set-var #f (list varname value)))
+;;
+;; (define (rmt:inc-var varname)
+;; (rmt:send-receive 'inc-var #f (list varname)))
+;;
+;; (define (rmt:dec-var varname)
+;; (rmt:send-receive 'dec-var #f (list varname)))
+;;
+;; (define (rmt:add-var varname value)
+;; (rmt:send-receive 'add-var #f (list varname value)))
+;;
+;; ;;======================================================================
+;; ;; M U L T I R U N Q U E R I E S
+;; ;;======================================================================
+;;
+;; ;; Need to move this to multi-run section and make associated changes
+;; (define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
+;; (let ((run-ids (rmt:get-all-run-ids)))
+;; (for-each (lambda (run-id)
+;; (rmt:find-and-mark-incomplete run-id ovr-deadtime))
+;; run-ids)))
+;;
+;; ;; get the previous record for when this test was run where all keys match but runname
+;; ;; returns #f if no such test found, returns a single test record if found
+;; ;;
+;; ;; Run this at the client end since we have to connect to multiple run-id dbs
+;; ;;
+;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
+;; (let* ((keyvals (rmt:get-key-val-pairs run-id))
+;; (keys (rmt:get-keys))
+;; (selstr (string-intersperse keys ","))
+;; (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
+;; (if (not keyvals)
+;; #f
+;; (let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
+;; ;; for each run starting with the most recent look to see if there is a matching test
+;; ;; if found then return that matching test record
+;; (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
+;; (if (null? prev-run-ids) #f
+;; (let loop ((hed (car prev-run-ids))
+;; (tal (cdr prev-run-ids)))
+;; (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
+;; #f #f #f ;; offset limit not-in hide/not-hide
+;; #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
+;; (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
+;; (if (and (null? results)
+;; (not (null? tal)))
+;; (loop (car tal)(cdr tal))
+;; (if (null? results) #f
+;; (car results))))))))))
+;;
+;; (define (rmt:get-run-stats)
+;; (rmt:send-receive 'get-run-stats #f '()))
+;;
+;; ;;======================================================================
+;; ;; S T E P S
+;; ;;======================================================================
+;;
+;; ;; Getting steps is more complicated.
+;; ;;
+;; ;; If given work area
+;; ;; 1. Find the testdat.db file
+;; ;; 2. Open the testdat.db file and do the query
+;; ;; If not given the work area
+;; ;; 1. Do a remote call to get the test path
+;; ;; 2. Continue as above
+;; ;;
+;; ;;(define (rmt:get-steps-for-test run-id test-id)
+;; ;; (rmt:send-receive 'get-steps-data run-id (list test-id)))
+;;
+;; (define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (let* ((state (items:check-valid-items "state" state-in))
+;; (status (items:check-valid-items "status" status-in)))
+;; (if (or (not state)(not status))
+;; (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
+;; " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
+;; (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
+;;
+;;
+;; (define (rmt:delete-steps-for-test! run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id)))
+;;
+;; (define (rmt:get-steps-for-test run-id test-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))
+;;
+;; (define (rmt:get-steps-info-by-id run-id test-step-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id)))
+;;
+;; ;;======================================================================
+;; ;; T E S T D A T A
+;; ;;======================================================================
+;;
+;; (define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
+;;
+;; (define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f))
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt)))
+;;
+;; (define (rmt:get-data-info-by-id run-id test-data-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id)))
+;;
+;; (define (rmt:testmeta-add-record testname)
+;; (rmt:send-receive 'testmeta-add-record #f (list testname)))
+;;
+;; (define (rmt:testmeta-get-record testname)
+;; (rmt:send-receive 'testmeta-get-record #f (list testname)))
+;;
+;; (define (rmt:testmeta-update-field test-name fld val)
+;; (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))
+;;
+;; (define (rmt:test-data-rollup run-id test-id status)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
+;;
+;; (define (rmt:csv->test-data run-id test-id csvdata)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))
+;;
+;; ;;======================================================================
+;; ;; T A S K S
+;; ;;======================================================================
+;;
+;; (define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
+;; (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))
+;;
+;; (define (rmt:tasks-add action owner target runname testpatt params)
+;; (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))
+;;
+;; (define (rmt:tasks-set-state-given-param-key param-key new-state)
+;; (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state)))
+;;
+;; (define (rmt:tasks-get-last target runname)
+;; (rmt:send-receive 'tasks-get-last #f (list target runname)))
+;;
+;; ;;======================================================================
+;; ;; N O S Y N C D B
+;; ;;======================================================================
+;;
+;; (define (rmt:no-sync-set var val)
+;; (rmt:send-receive 'no-sync-set #f `(,var ,val)))
+;;
+;; (define (rmt:no-sync-get/default var default)
+;; (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
+;;
+;; (define (rmt:no-sync-del! var)
+;; (rmt:send-receive 'no-sync-del! #f `(,var)))
+;;
+;; (define (rmt:no-sync-get-lock keyname)
+;; (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
+;;
+;; ;;======================================================================
+;; ;; A R C H I V E S
+;; ;;======================================================================
+;;
+;; (define (rmt:archive-get-allocations testname itempath dneeded)
+;; (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))
+;;
+;; (define (rmt:archive-register-block-name bdisk-id archive-path)
+;; (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))
+;;
+;; (define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
+;; (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey)))
+;;
+;; (define (rmt:archive-register-disk bdisk-name bdisk-path df)
+;; (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))
+;;
+;; (define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
+;; (assert (number? run-id) "FATAL: Run id required.")
+;; (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
+;;
+;; (define (rmt:test-get-archive-block-info archive-block-id)
+;; (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
+;;
+;;
+;; (define (rmtmod:calc-ro-mode runremote *toppath*)
+;; (if (and runremote
+;; (remote-ro-mode-checked runremote))
+;; (remote-ro-mode runremote)
+;; (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
+;; (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+;; (if runremote
+;; (begin
+;; (remote-ro-mode-set! runremote ro-mode)
+;; (remote-ro-mode-checked-set! runremote #t)
+;; ro-mode)
+;; ro-mode))))
+;;
+;; (define (extras-readonly-mode rmt-mutex log-port cmd params)
+;; (mutex-unlock! rmt-mutex)
+;; (debug:print-info 12 log-port "rmt:send-receive, case 3")
+;; (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
+;; #f)
+;;
+;; (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
+;; (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
+;; (mutex-lock! *rmt-mutex*)
+;; (http-transport:close-connections runremote)
+;; (remote-server-url-set! runremote #f)
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
+;; (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
+;;
+;; (define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
+;; (if (and (vector? res)
+;; (eq? (vector-length res) 2)
+;; (eq? (vector-ref res 1) 'overloaded)) ;; since we are
+;; ;; looking at the
+;; ;; data to carry the
+;; ;; error we'll use a
+;; ;; fairly obtuse
+;; ;; combo to minimise
+;; ;; the chances of
+;; ;; some sort of
+;; ;; collision. this
+;; ;; is the case where
+;; ;; the returned data
+;; ;; is bad or the
+;; ;; server is
+;; ;; overloaded and we
+;; ;; want to ease off
+;; ;; the queries
+;; (let ((wait-delay (+ attemptnum (* attemptnum 10))))
+;; (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
+;; (mutex-lock! *rmt-mutex*)
+;; (http-transport:close-connections runremote)
+;; (set! *runremote* #f) ;; force starting over
+;; (mutex-unlock! *rmt-mutex*)
+;; (thread-sleep! wait-delay)
+;; (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
+;; res)) ;; All good, return res
+;;
+;; #;(set-functions rmt:send-receive remote-server-url-set!
+;; http-transport:close-connections remote-conndat-set!
+;; debug:print debug:print-info
+;; remote-ro-mode remote-ro-mode-set!
+;; remote-ro-mode-checked-set! remote-ro-mode-checked)
+;;
+;;
Index: rmtmod.scm
==================================================================
--- rmtmod.scm
+++ rmtmod.scm
@@ -18,68 +18,1088 @@
;;======================================================================
(declare (unit rmtmod))
(declare (uses commonmod))
-(declare (uses apimod))
-;; (declare (uses apimod.import))
-(declare (uses ulex))
-
-;; (include "ulex/ulex.scm")
+(declare (uses clientmod))
+(declare (uses dbmod))
(module rmtmod
- *
-
-(import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18)
-(import (prefix commonmod cmod:))
-(import apimod)
-(import (prefix ulex ulex:))
-
-(defstruct alldat
- (areapath #f)
- (ulexdat #f)
- )
-
-;;======================================================================
-;; return the handle struct for sending queries to a specific database
-;; - initializes the connection object if this is the first access
-;; - finds the "captain" and asks who to talk to for the given dbfname
-;; - establishes the connection to the current dbowner
-;;
-#;(define (rmt:connect alldat dbfname dbtype)
- (let* ((ulexdat (or (alldat-ulexdat alldat)
- (rmt:setup-ulex alldat))))
- (ulex:connect ulexdat dbfname dbtype)))
-
-;; setup the remote calls
-#;(define (rmt:setup-ulex alldat)
- (let* ((udata (ulex:setup))) ;; establish connection to ulex
- (alldat-ulexdat-set! alldat udata)
- ;; register all needed procs
- (ulex:register-handler udata 'ping cmod:get-full-version) ;; override ping with get-full-version
- (ulex:register-handler udata 'login cmod:get-full-version) ;; force setup of the connection
- (ulex:register-handler udata 'execute api:execute-requests)
- udata))
-
-;; set up a connection to the current owner of the dbfile associated with rid
-;; then send the query to that dbfile owner and wait for a response.
-;;
-#;(define (rmt:send-receive alldat cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
- (let* (;; (alldat *alldat*)
- (areapath (alldat-areapath alldat))
- (dbtype (if (or (not rid)(< rid 1)) ;; this is the criteria for "main.db"
- "main" "runs"))
- (dbfname (if (equal? dbtype "main")
- "main.db"
- (conc rid ".db")))
- (dbfile (conc areapath "/.db/" dbfname))
- (ulexconn (rmt:connect alldat dbfname dbtype)) ;; ulexconn is our new *runremote*, it is a dbowner struct < pdat lastrefresh >
- (udata (alldat-ulexdat alldat)))
- (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid params)))
- ;; need to call this on the other side
- ;; (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
-
- #;(with-input-from-string
- (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid (with-output-to-string (lambda ()(serialize params))))
- (lambda ()(deserialize)))
+*
+
+(import scheme
+ chicken
+ data-structures
+ posix
+ srfi-1
+ srfi-18
+ srfi-69
+ extras
+
+ clientmod
+ dbmod
+ )
+
+;;
+;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
+;;
+
+;; generate entries for ~/.megatestrc with the following
+;;
+;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
+
+;;======================================================================
+;; S U P P O R T F U N C T I O N S
+;;======================================================================
+
+;; if a server is either running or in the process of starting call client:setup
+;; else return #f to let the calling proc know that there is no server available
+;;
+(define (rmt:get-connection-info areapath) ;; TODO: push areapath down.
+ (if *runremote*
+ *runremote*
+ (begin
+ (set! *runremote* (client:find-server areapath))
+ (con-obj-to-str-set! *runremote* db:obj->str)
+ (con-host-set! *runremote* (get-host-name))
+ (con-pid-set! *runremote* (current-process-id))
+ *runremote*)))
+
+ #;(let* ((cinfo (if (remote? runremote)
+ (remote-conndat runremote)
+ #f)))
+ (if cinfo
+ cinfo
+ (if (server:check-if-running areapath)
+ (client:setup areapath runremote)
+ #f)))
+
+(define (rmt:on-homehost? runremote)
+ #t
+ #;(let* ((hh-dat (remote-hh-dat runremote)))
+ (if (pair? hh-dat)
+ (cdr hh-dat)
+ (begin
+ (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
+ #f))))
+
+
+;;======================================================================
+
+(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
+
+(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
+ (let* ((con (rmt:get-connection-info areapath)))
+ (client:send-receive con cmd params)))
+
+
+
+;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
+;;
+;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
+;; #;(common:telemetry-log (conc "rmt:"(->string cmd))
+;; payload: `((rid . ,rid)
+;; (params . ,params)))
+;;
+;; (if (> attemptnum 2)
+;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
+;;
+;; (cond
+;; ((> attemptnum 2) (thread-sleep! 0.05))
+;; ((> attemptnum 10) (thread-sleep! 0.5))
+;; ((> attemptnum 20) (thread-sleep! 1)))
+;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))
+;; (begin (server:run *toppath*) (thread-sleep! 3)))
+;;
+;;
+;; ;;DOT digraph megatest_state_status {
+;; ;;DOT ranksep=0;
+;; ;;DOT // rankdir=LR;
+;; ;;DOT node [shape="box"];
+;; ;;DOT "rmt:send-receive" -> MUTEXLOCK;
+;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
+;; ;; do all the prep locked under the rmt-mutex
+;; (mutex-lock! *rmt-mutex*)
+;;
+;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
+;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
+;; ;; 3. do the query, if on homehost use local access
+;; ;;
+;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
+;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
+;; (runremote (or area-dat
+;; *runremote*))
+;; (attemptnum (+ 1 attemptnum))
+;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
+;;
+;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
+;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
+;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
+;; ;; ensure we have a record for our connection for given area
+;; (if (not runremote) ;; can remove this one. should never get here.
+;; (begin
+;; (set! *runremote* (make-remote))
+;; (let* ((server-info (remote-server-info *runremote*)))
+;; (if server-info
+;; (begin
+;; (remote-server-url-set! *runremote* (server:record->url server-info))
+;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
+;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
+;;
+;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
+;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
+;; ;; DOT SET_HOMEHOST -> MUTEXLOCK;
+;; ;; ensure we have a homehost record
+;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
+;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
+;; (let ((hh-data (server:choose-server areapath 'homehost)))
+;; (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
+;;
+;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
+;; (cond
+;; #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
+;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
+;; (set! *runremote* #f)
+;; ;; BUG: close-connections should go here?
+;; (mutex-unlock! *rmt-mutex*)
+;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
+;;
+;; ;;DOT EXIT;
+;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
+;; ;; give up if more than 150 attempts
+;; ((> attemptnum 150)
+;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
+;; (exit 1))
+;;
+;; ;;DOT CASE2 [label="local\nreadonly\nquery"];
+;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
+;; ;;DOT CASE2 -> "rmt:open-qry-close-locally";
+;; ;; readonly mode, read request- handle it - case 2
+;; ((and readonly-mode
+;; (member cmd api:read-only-queries))
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
+;; (rmt:open-qry-close-locally cmd 0 params)
+;; )
+;;
+;; ;;DOT CASE3 [label="write in\nread-only mode"];
+;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
+;; ;;DOT CASE3 -> "#f";
+;; ;; readonly mode, write request. Do nothing, return #f
+;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
+;;
+;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
+;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
+;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
+;; ;;
+;; ;;DOT CASE4 [label="reset\nconnection"];
+;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
+;; ;;DOT CASE4 -> "rmt:send-receive";
+;; ;; reset the connection if it has been unused too long
+;; ((and runremote
+;; ;; (remote-conndat runremote)
+;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
+;; (+ (remote-last-access runremote)
+;; (remote-server-timeout runremote))))
+;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
+;; (http-transport:close-connections runremote)
+;; ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
+;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
+;; (mutex-unlock! *rmt-mutex*)
+;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
+;;
+;; ;;DOT CASE5 [label="local\nread"];
+;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
+;; ;;DOT CASE5 -> "rmt:open-qry-close-locally";
+;;
+;; ;; on homehost and this is a read
+;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; (rmt:on-homehost? runremote)
+;; (member cmd api:read-only-queries)) ;; this is a read
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
+;; (rmt:open-qry-close-locally cmd 0 params))
+;;
+;; ;;DOT CASE6 [label="init\nremote"];
+;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
+;; ;;DOT CASE6 -> "rmt:send-receive";
+;; ;; on homehost and this is a write, we already have a server, but server has died
+;;
+;; ;; reinstate this keep-alive section but inject a time condition into the (add ...
+;; ;;
+;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost
+;; ;; (not (member cmd api:read-only-queries)) ;; this is a write
+;; ;; (remote-server-url runremote) ;; have a server
+;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
+;; ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
+;; ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
+;; ;; (set! *runremote* (make-remote))
+;; ;; (let* ((server-info (remote-server-info *runremote*)))
+;; ;; (if server-info
+;; ;; (begin
+;; ;; (remote-server-url-set! *runremote* (server:record->url server-info))
+;; ;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
+;; ;; (remote-force-server-set! runremote (common:force-server?))
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
+;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
+;;
+;; ;;DOT CASE7 [label="homehost\nwrite"];
+;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
+;; ;;DOT CASE7 -> "rmt:open-qry-close-locally";
+;; ;; on homehost and this is a write, we already have a server
+;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; (cdr (remote-hh-dat runremote)) ;; on homehost
+;; (not (member cmd api:read-only-queries)) ;; this is a write
+;; (remote-server-url runremote)) ;; have a server
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
+;; (rmt:open-qry-close-locally cmd 0 params))
+;;
+;; ;;DOT CASE8 [label="force\nserver"];
+;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
+;; ;;DOT CASE8 -> "rmt:open-qry-close-locally";
+;; ;; on homehost, no server contact made and this is a write, passively start a server
+;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+;; (cdr (remote-hh-dat runremote)) ;; have homehost
+;; (not (remote-server-url runremote)) ;; no connection yet
+;; (not (member cmd api:read-only-queries))) ;; not a read-only query
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
+;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
+;; (if server-info
+;; (begin
+;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
+;; (remote-server-id-set! runremote (server:record->id server-info)))
+;; (if (common:force-server?)
+;; (server:start-and-wait *toppath*)
+;; (server:kind-run *toppath*)))
+;; (remote-force-server-set! runremote (common:force-server?))
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
+;; (rmt:open-qry-close-locally cmd 0 params)))
+;;
+;; ;;DOT CASE9 [label="force server\nnot on homehost"];
+;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
+;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
+;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
+;; (not (remote-conndat runremote)))
+;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
+;; (not (remote-conndat runremote)))) ;; and no connection
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
+;; (mutex-unlock! *rmt-mutex*)
+;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
+;; (server:start-and-wait *toppath*))
+;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
+;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
+;;
+;; ;;DOT CASE10 [label="on homehost"];
+;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
+;; ;;DOT CASE10 -> "rmt:open-qry-close-locally";
+;; ;; all set up if get this far, dispatch the query
+;; ((and (not (remote-force-server runremote))
+;; (cdr (remote-hh-dat runremote))) ;; we are on homehost
+;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
+;; (rmt:open-qry-close-locally cmd (if rid rid 0) params))
+;;
+;; ;;DOT CASE11 [label="send_receive"];
+;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
+;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
+;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
+;; ;; not on homehost, do server query
+;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
+;; ;;DOT }
+;;
+;; ;; bunch of small functions factored out of send-receive to make debug easier
+;; ;;
+;;
+;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
+;; ;; (mutex-unlock! *rmt-mutex*)
+;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
+;; ;; (mutex-lock! *rmt-mutex*)
+;; (let* ((conninfo (remote-conndat runremote))
+;; (dat-in (condition-case ;; handling here has
+;; ;; caused a lot of
+;; ;; problems. However it
+;; ;; is needed to deal with
+;; ;; attemtped
+;; ;; communication to
+;; ;; servers that have gone
+;; ;; away
+;; (http-transport:client-api-send-receive 0 runremote cmd params)
+;; ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
+;; ((servermismatch) (vector #f "Server id mismatch" ))
+;; ((commfail)(vector #f "communications fail"))
+;; ((exn)(vector #f "other fail" (print-call-chain)))))
+;; (dat (if (and (vector? dat-in) ;; ... check it is a correct size
+;; (> (vector-length dat-in) 1))
+;; dat-in
+;; (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
+;; (success (if (vector? dat) (vector-ref dat 0) #f))
+;; (res (if (vector? dat) (vector-ref dat 1) #f)))
+;; (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
+;; (remote-last-access-set! runremote (current-seconds)) ;; refresh access time
+;; (begin
+;; (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
+;; (set! conninfo #f)
+;; (http-transport:close-connections runremote)))
+;; (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
+;; (mutex-unlock! *rmt-mutex*)
+;; (if success ;; success only tells us that the transport was
+;; ;; successful, have to examine the data to see if
+;; ;; there was a detected issue at the other end
+;; (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
+;; (begin
+;; (debug:print-error 0 *default-log-port* " dat=" dat)
+;; (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
+;; )))
+
+(define (rmt:print-db-stats)
+ (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+ (debug:print 18 *default-log-port* "DB Stats\n========")
+ (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+ (for-each (lambda (cmd)
+ (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+ (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+ (sort (hash-table-keys *db-stats*)
+ (lambda (a b)
+ (> (vector-ref (hash-table-ref *db-stats* a) 0)
+ (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+
+(define (rmt:get-max-query-average run-id)
+ (mutex-lock! *db-stats-mutex*)
+ (let* ((runkey (conc "run-id=" run-id " "))
+ (cmds (filter (lambda (x)
+ (substring-index runkey x))
+ (hash-table-keys *db-stats*)))
+ (res (if (null? cmds)
+ (cons 'none 0)
+ (let loop ((cmd (car cmds))
+ (tal (cdr cmds))
+ (max-cmd (car cmds))
+ (res 0))
+ (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+ (tot (vector-ref cmd-dat 0))
+ (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+ (currmax (max res curravg))
+ (newmax-cmd (if (> curravg res) cmd max-cmd)))
+ (if (null? tal)
+ (if (> tot 10)
+ (cons newmax-cmd currmax)
+ (cons 'none 0))
+ (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+ (mutex-unlock! *db-stats-mutex*)
+ res))
+
+(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
+ (let* ((qry-is-write (not (member cmd api:read-only-queries)))
+ (db-file-path (db:dbfile-path)) ;; 0))
+ (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
+ (read-only (not (file-write-access? db-file-path)))
+ (start (current-milliseconds))
+ (resdat (if (not (and read-only qry-is-write))
+ (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
+ ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
+ ;; exn ;; This is an attempt to detect that situation and recover gracefully
+ ;; (begin
+ ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
+ ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
+ (if (and (vector? v)
+ (> (vector-length v) 1))
+ (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
+ newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
+ (vector #t '()))) ;; ) ;; we could also check that the returned types are valid
+ (vector #t '())))
+ (success (vector-ref resdat 0))
+ (res (vector-ref resdat 1))
+ (duration (- (current-milliseconds) start)))
+ (if (and read-only qry-is-write)
+ (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
+ (if (not success)
+ (if (> remretries 0)
+ (begin
+ (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
+ (thread-sleep! (/ (random 5000) 1000)) ;; some random delay
+ (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
+ (begin
+ (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
+ #f))
+ (begin
+ ;; (rmt:update-db-stats run-id cmd params duration)
+ ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
+ (if qry-is-write
+ (let ((start-time (current-seconds)))
+ (mutex-lock! *db-multi-sync-mutex*)
+/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client)
+ (mutex-unlock! *db-multi-sync-mutex*)))))
+ res))
+
+(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
+ (let* ((run-id (if run-id run-id 0))
+ (res (http-transport:client-api-send-receive run-id runremote cmd params)))
+ (if (and res (vector-ref res 0))
+ (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
+ #f)))
+
+;;======================================================================
+;;
+;; A C T U A L A P I C A L L S
+;;
+;;======================================================================
+
+;;======================================================================
+;; S E R V E R
+;;======================================================================
+
+(define (rmt:kill-server run-id)
+ (rmt:send-receive 'kill-server run-id (list run-id)))
+
+(define (rmt:start-server run-id)
+ (rmt:send-receive 'start-server 0 (list run-id)))
+
+;;======================================================================
+;; M I S C
+;;======================================================================
+
+(define (rmt:login run-id)
+ (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))
+
+;; This login does no retries under the hood - it acts a bit like a ping.
+;; Deprecated for nmsg-transport.
+;;
+(define (rmt:login-no-auto-client-setup runremote)
+ (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
+
+;; hand off a call to one of the db:queries statements
+;; added run-id to make looking up the correct db possible
+;;
+(define (rmt:general-call stmtname run-id . params)
+ (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
+
+
+;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
+(define (rmt:get-latest-host-load hostname)
+ (rmt:send-receive 'get-latest-host-load 0 (list hostname)))
+
+(define (rmt:sdb-qry qry val run-id)
+ ;; add caching if qry is 'getid or 'getstr
+ (rmt:send-receive 'sdb-qry run-id (list qry val)))
+
+;; NOT COMPLETED
+(define (rmt:runtests user run-id testpatt params)
+ (rmt:send-receive 'runtests run-id testpatt))
+
+(define (rmt:get-run-record-ids target run keynames test-patt)
+ (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))
+
+(define (rmt:get-changed-record-ids since-time)
+ (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )
+
+(define (rmt:drop-all-triggers)
+ (rmt:send-receive 'drop-all-triggers #f '()))
+
+(define (rmt:create-all-triggers)
+ (rmt:send-receive 'create-all-triggers #f '()))
+
+;;======================================================================
+;; T E S T M E T A
+;;======================================================================
+
+(define (rmt:get-tests-tags)
+ (rmt:send-receive 'get-tests-tags #f '()))
+
+;;======================================================================
+;; K E Y S
+;;======================================================================
+
+;; These require run-id because the values come from the run!
+;;
+(define (rmt:get-key-val-pairs run-id)
+ (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
+
+(define (rmt:get-keys)
+ (if *db-keys* *db-keys*
+ (let ((res (rmt:send-receive 'get-keys #f '())))
+ (set! *db-keys* res)
+ res)))
+
+(define (rmt:get-keys-write) ;; dummy query to force server start
+ (let ((res (rmt:send-receive 'get-keys-write #f '())))
+ (set! *db-keys* res)
+ res))
+
+;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
+;; to cache the resuls in a hash
+;;
+(define (rmt:get-key-vals run-id)
+ (or (hash-table-ref/default *keyvals* run-id #f)
+ (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
+ (hash-table-set! *keyvals* run-id res)
+ res)))
+
+(define (rmt:get-targets)
+ (rmt:send-receive 'get-targets #f '()))
+
+(define (rmt:get-target run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-target run-id (list run-id)))
+
+(define (rmt:get-run-times runpatt targetpatt)
+ (rmt:send-receive 'get-run-times #f (list runpatt targetpatt )))
+
+
+;;======================================================================
+;; T E S T S
+;;======================================================================
+
+;; Just some syntatic sugar
+(define (rmt:register-test run-id test-name item-path)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:general-call 'register-test run-id run-id test-name item-path))
+
+(define (rmt:get-test-id run-id testname item-path)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
+
+;; run-id is NOT used
+;;
+(define (rmt:get-test-info-by-id run-id test-id)
+ (if (number? test-id)
+ (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
+ (begin
+ (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
+ (print-call-chain (current-error-port))
+ #f)))
+
+(define (rmt:test-get-rundir-from-test-id run-id test-id)
+ (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
+
+(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
+ (assert (number? run-id) "FATAL: Run id required.")
+ (let* ((test-path (if (string? work-area)
+ work-area
+ (rmt:test-get-rundir-from-test-id run-id test-id))))
+ (debug:print 3 *default-log-port* "TEST PATH: " test-path)
+ (open-test-db test-path)))
+
+;; WARNING: This currently bypasses the transaction wrapped writes system
+(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
+
+(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
+
+(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
+ (assert (number? run-id) "FATAL: Run id required.")
+ ;; (if (number? run-id)
+ (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
+ ;; (begin
+ ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
+ ;; (print-call-chain (current-error-port))
+ ;; '())))
+
+(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
+
+;; get stuff via synchash
+(define (rmt:synchash-get run-id proc synckey keynum params)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
+
+(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
+
+;; IDEA: Threadify these - they spend a lot of time waiting ...
+;;
+(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
+ (let ((multi-run-mutex (make-mutex))
+ (run-id-list (if run-ids
+ run-ids
+ (rmt:get-all-run-ids)))
+ (result '()))
+ (if (null? run-id-list)
+ '()
+ (let loop ((hed (car run-id-list))
+ (tal (cdr run-id-list))
+ (threads '()))
+ (if (> (length threads) 5)
+ (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
+ (let* ((newthread (make-thread
+ (lambda ()
+ (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
+ (if (list? res)
+ (begin
+ (mutex-lock! multi-run-mutex)
+ (set! result (append result res))
+ (mutex-unlock! multi-run-mutex))
+ (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
+ (conc "multi-run-thread for run-id " hed)))
+ (newthreads (cons newthread threads)))
+ (thread-start! newthread)
+ (thread-sleep! 0.05) ;; give that thread some time to start
+ (if (null? tal)
+ newthreads
+ (loop (car tal)(cdr tal) newthreads))))))
+ result))
+
+;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
+;; ;;
+;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
+;; (let ((run-id-list (if run-ids
+;; run-ids
+;; (rmt:get-all-run-ids))))
+;; (apply append (map (lambda (run-id)
+;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
+;; run-id-list))))
+
+(define (rmt:delete-test-records run-id test-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
+
+(define (rmt:test-set-state-status run-id test-id state status msg)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))
+
+(define (rmt:test-toplevel-num-items run-id test-name)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))
+
+;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
+;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
+
+(define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
+
+(define (rmt:test-get-logfile-info run-id test-name)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
+
+(define (rmt:test-get-records-for-index-file run-id test-name)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
+
+(define (rmt:get-testinfo-state-status run-id test-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
+
+(define (rmt:test-set-log! run-id test-id logf)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
+
+(define (rmt:test-set-top-process-pid run-id test-id pid)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))
+
+(define (rmt:test-get-top-process-pid run-id test-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))
+
+(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
+ (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
+
+;; NOTE: This will open and access ALL run databases.
+;;
+(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
+ (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
+ (apply append
+ (map (lambda (run-id)
+ (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
+ run-ids))))
+
+(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
+
+(define (rmt:get-count-tests-running-for-run-id run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
+
+(define (rmt:get-not-completed-cnt run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
+
+
+;; Statistical queries
+
+(define (rmt:get-count-tests-running run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
+
+(define (rmt:get-count-tests-running-for-testname run-id testname)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
+
+(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
+
+;; state and status are extra hints not usually used in the calculation
+;;
+(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
+
+(define (rmt:set-state-status-and-roll-up-run run-id state status)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
+
+
+(define (rmt:update-pass-fail-counts run-id test-name)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
+
+(define (rmt:top-test-set-per-pf-counts run-id test-name)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))
+
+(define (rmt:get-raw-run-stats run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))
+
+(define (rmt:get-test-times runname target)
+ (rmt:send-receive 'get-test-times #f (list runname target )))
+
+;;======================================================================
+;; R U N S
+;;======================================================================
+
+;; BUG - LOOK AT HOW THIS WORKS!!!
+;;
+(define (rmt:get-run-info run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-run-info #f (list run-id)))
+
+(define (rmt:get-num-runs runpatt)
+ (rmt:send-receive 'get-num-runs #f (list runpatt)))
+
+(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
+ (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys)))
+
+;; Use the special run-id == #f scenario here since there is no run yet
+(define (rmt:register-run keyvals runname state status user contour)
+ (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
+
+(define (rmt:get-run-name-from-id run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-run-name-from-id #f (list run-id)))
+
+(define (rmt:delete-run run-id)
+ (rmt:send-receive 'delete-run #f (list run-id)))
+
+(define (rmt:update-run-stats run-id stats)
+ (rmt:send-receive 'update-run-stats #f (list run-id stats)))
+
+(define (rmt:delete-old-deleted-test-records)
+ (rmt:send-receive 'delete-old-deleted-test-records #f '()))
+
+(define (rmt:get-runs runpatt count offset keypatts)
+ (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))
+
+(define (rmt:simple-get-runs runpatt count offset target last-update)
+ (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target last-update)))
+
+(define (rmt:get-all-run-ids)
+ (rmt:send-receive 'get-all-run-ids #f '()))
+
+(define (rmt:get-prev-run-ids run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
+
+(define (rmt:lock/unlock-run run-id lock unlock user)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
+
+;; set/get status
+(define (rmt:get-run-status run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-run-status #f (list run-id)))
+
+(define (rmt:get-run-state run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-run-state #f (list run-id)))
+
+
+(define (rmt:set-run-status run-id run-status #!key (msg #f))
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
+
+(define (rmt:set-run-state-status run-id state status )
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'set-run-state-status #f (list run-id state status)))
+
+(define (rmt:update-tesdata-on-repilcate-db old-lt new-lt)
+(rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt)))
+
+(define (rmt:update-run-event_time run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'update-run-event_time #f (list run-id)))
+
+(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default
+ (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))
+
+(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
+ (assert (number? run-id) "FATAL: Run id required.")
+ ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
+ (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )
+
+(define (rmt:get-main-run-stats run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-main-run-stats #f (list run-id)))
+
+(define (rmt:get-var varname)
+ (rmt:send-receive 'get-var #f (list varname)))
+
+(define (rmt:del-var varname)
+ (rmt:send-receive 'del-var #f (list varname)))
+
+(define (rmt:set-var varname value)
+ (rmt:send-receive 'set-var #f (list varname value)))
+
+(define (rmt:inc-var varname)
+ (rmt:send-receive 'inc-var #f (list varname)))
+
+(define (rmt:dec-var varname)
+ (rmt:send-receive 'dec-var #f (list varname)))
+
+(define (rmt:add-var varname value)
+ (rmt:send-receive 'add-var #f (list varname value)))
+
+;;======================================================================
+;; M U L T I R U N Q U E R I E S
+;;======================================================================
+
+;; Need to move this to multi-run section and make associated changes
+(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
+ (let ((run-ids (rmt:get-all-run-ids)))
+ (for-each (lambda (run-id)
+ (rmt:find-and-mark-incomplete run-id ovr-deadtime))
+ run-ids)))
+
+;; get the previous record for when this test was run where all keys match but runname
+;; returns #f if no such test found, returns a single test record if found
+;;
+;; Run this at the client end since we have to connect to multiple run-id dbs
+;;
+(define (rmt:get-previous-test-run-record run-id test-name item-path)
+ (let* ((keyvals (rmt:get-key-val-pairs run-id))
+ (keys (rmt:get-keys))
+ (selstr (string-intersperse keys ","))
+ (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
+ (if (not keyvals)
+ #f
+ (let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
+ ;; for each run starting with the most recent look to see if there is a matching test
+ ;; if found then return that matching test record
+ (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
+ (if (null? prev-run-ids) #f
+ (let loop ((hed (car prev-run-ids))
+ (tal (cdr prev-run-ids)))
+ (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
+ #f #f #f ;; offset limit not-in hide/not-hide
+ #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
+ (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
+ (if (and (null? results)
+ (not (null? tal)))
+ (loop (car tal)(cdr tal))
+ (if (null? results) #f
+ (car results))))))))))
+
+(define (rmt:get-run-stats)
+ (rmt:send-receive 'get-run-stats #f '()))
+
+;;======================================================================
+;; S T E P S
+;;======================================================================
+
+;; Getting steps is more complicated.
+;;
+;; If given work area
+;; 1. Find the testdat.db file
+;; 2. Open the testdat.db file and do the query
+;; If not given the work area
+;; 1. Do a remote call to get the test path
+;; 2. Continue as above
+;;
+;;(define (rmt:get-steps-for-test run-id test-id)
+;; (rmt:send-receive 'get-steps-data run-id (list test-id)))
+
+(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (let* ((state (items:check-valid-items "state" state-in))
+ (status (items:check-valid-items "status" status-in)))
+ (if (or (not state)(not status))
+ (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
+ " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
+ (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
+
+
+(define (rmt:delete-steps-for-test! run-id test-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id)))
+
+(define (rmt:get-steps-for-test run-id test-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))
+
+(define (rmt:get-steps-info-by-id run-id test-step-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id)))
+
+;;======================================================================
+;; T E S T D A T A
+;;======================================================================
+
+(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f))
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
+
+(define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f))
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt)))
+
+(define (rmt:get-data-info-by-id run-id test-data-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id)))
+
+(define (rmt:testmeta-add-record testname)
+ (rmt:send-receive 'testmeta-add-record #f (list testname)))
+
+(define (rmt:testmeta-get-record testname)
+ (rmt:send-receive 'testmeta-get-record #f (list testname)))
+
+(define (rmt:testmeta-update-field test-name fld val)
+ (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))
+
+(define (rmt:test-data-rollup run-id test-id status)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
+
+(define (rmt:csv->test-data run-id test-id csvdata)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))
+
+;;======================================================================
+;; T A S K S
+;;======================================================================
+
+(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
+ (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))
+
+(define (rmt:tasks-add action owner target runname testpatt params)
+ (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))
+
+(define (rmt:tasks-set-state-given-param-key param-key new-state)
+ (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state)))
+
+(define (rmt:tasks-get-last target runname)
+ (rmt:send-receive 'tasks-get-last #f (list target runname)))
+
+;;======================================================================
+;; N O S Y N C D B
+;;======================================================================
+
+(define (rmt:no-sync-set var val)
+ (rmt:send-receive 'no-sync-set #f `(,var ,val)))
+
+(define (rmt:no-sync-get/default var default)
+ (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
+
+(define (rmt:no-sync-del! var)
+ (rmt:send-receive 'no-sync-del! #f `(,var)))
+
+(define (rmt:no-sync-get-lock keyname)
+ (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
+
+;;======================================================================
+;; A R C H I V E S
+;;======================================================================
+
+(define (rmt:archive-get-allocations testname itempath dneeded)
+ (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))
+
+(define (rmt:archive-register-block-name bdisk-id archive-path)
+ (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))
+
+(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
+ (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey)))
+
+(define (rmt:archive-register-disk bdisk-name bdisk-path df)
+ (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))
+
+(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
+
+(define (rmt:test-get-archive-block-info archive-block-id)
+ (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
+
+
+(define (rmtmod:calc-ro-mode runremote *toppath*)
+ (if (and runremote
+ (remote-ro-mode-checked runremote))
+ (remote-ro-mode runremote)
+ (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
+ (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+ (if runremote
+ (begin
+ (remote-ro-mode-set! runremote ro-mode)
+ (remote-ro-mode-checked-set! runremote #t)
+ ro-mode)
+ ro-mode))))
+
+(define (extras-readonly-mode rmt-mutex log-port cmd params)
+ (mutex-unlock! rmt-mutex)
+ (debug:print-info 12 log-port "rmt:send-receive, case 3")
+ (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
+ #f)
+
+(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
+ (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
+ (mutex-lock! *rmt-mutex*)
+ (http-transport:close-connections runremote)
+ (remote-server-url-set! runremote #f)
+ (mutex-unlock! *rmt-mutex*)
+ (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
+ (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
+
+(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
+ (if (and (vector? res)
+ (eq? (vector-length res) 2)
+ (eq? (vector-ref res 1) 'overloaded)) ;; since we are
+ ;; looking at the
+ ;; data to carry the
+ ;; error we'll use a
+ ;; fairly obtuse
+ ;; combo to minimise
+ ;; the chances of
+ ;; some sort of
+ ;; collision. this
+ ;; is the case where
+ ;; the returned data
+ ;; is bad or the
+ ;; server is
+ ;; overloaded and we
+ ;; want to ease off
+ ;; the queries
+ (let ((wait-delay (+ attemptnum (* attemptnum 10))))
+ (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
+ (mutex-lock! *rmt-mutex*)
+ (http-transport:close-connections runremote)
+ (set! *runremote* #f) ;; force starting over
+ (mutex-unlock! *rmt-mutex*)
+ (thread-sleep! wait-delay)
+ (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
+ res)) ;; All good, return res
+
+#;(set-functions rmt:send-receive remote-server-url-set!
+ http-transport:close-connections remote-conndat-set!
+ debug:print debug:print-info
+ remote-ro-mode remote-ro-mode-set!
+ remote-ro-mode-checked-set! remote-ro-mode-checked)
+
+
)
Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -25,11 +25,11 @@
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
-(declare (uses server))
+(declare (uses servermod))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))
(include "common_records.scm")
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -14,857 +14,858 @@
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
-(require-extension (srfi 18) extras tcp s11n)
-
-(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
- directory-utils posix-extras matchable utils)
-
-(use spiffy uri-common intarweb http-client spiffy-request-vars)
-
-(declare (unit server))
-
-(declare (uses commonmod))
-
-(declare (uses common))
-(declare (uses db))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-;; (declare (uses synchash))
-(declare (uses http-transport))
-;;(declare (uses rpc-transport))
-(declare (uses launch))
-;; (declare (uses daemon))
-
-(import commonmod)
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-(define (server:make-server-url hostport)
- (if (not hostport)
- #f
- (conc "http://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-;; Call this to start the actual server
-;;
-
-;;======================================================================
-;; S E R V E R U T I L I T I E S
-;;======================================================================
-
-;; Get the transport
-(define (server:get-transport)
- (if *transport-type*
- *transport-type*
- (let ((ttype (string->symbol
- (or (args:get-arg "-transport")
- (configf:lookup *configdat* "server" "transport")
- "rpc"))))
- (set! *transport-type* ttype)
- ttype)))
-
-;; Generate a unique signature for this server
-(define (server:mk-signature)
- (message-digest-string (md5-primitive)
- (with-output-to-string
- (lambda ()
- (write (list (current-directory)
- (current-process-id)
- (argv)))))))
-
-(define (server:get-client-signature)
- (if *my-client-signature* *my-client-signature*
- (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
-(define (server:get-server-id)
- (if *server-id* *server-id*
- (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
- (set! *server-id* sig)
- *server-id*)))
-
-;; When using zmq this would send the message back (two step process)
-;; with spiffy or rpc this simply returns the return data to be returned
-;;
-(define (server:reply return-addr query-sig success/fail result)
- (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
- ;; (send-message pubsock target send-more: #t)
- ;; (send-message pubsock
- (case (server:get-transport)
- ((rpc) (db:obj->string (vector success/fail query-sig result)))
- ((http) (db:obj->string (vector success/fail query-sig result)))
- ((fs) result)
- (else
- (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
- result)))
-
-;; Given an area path, start a server process ### NOTE ### > file 2>&1
-;; if the target-host is set
-;; try running on that host
-;; incidental: rotate logs in logs/ dir.
-;;
-(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
- (let* ((testsuite (common:get-testsuite-name))
- (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
- (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
- ""))
- (cmdln (conc (common:get-megatest-exe)
- " -server - ";; (or target-host "-")
- (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
- " -daemonize "
- "")
- ;; " -log " logfile
- " -m testsuite:" testsuite
- " " profile-mode
- )) ;; (conc " >> " logfile " 2>&1 &")))))
- (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
- (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
- ;; we want the remote server to start in *toppath* so push there
- (push-directory areapath)
- (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
- (thread-start! log-rotate)
-
- ;; host.domain.tld match host?
- ;; (if (and target-host
- ;; ;; look at target host, is it host.domain.tld or ip address and does it
- ;; ;; match current ip or hostname
- ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
- ;; (not (equal? curr-ip target-host)))
- ;; (begin
- ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
- ;; (setenv "TARGETHOST" target-host)))
- ;;
- (setenv "TARGETHOST_LOGF" logfile)
- (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
- (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
- (system (conc "nbfake " cmdln))
- (unsetenv "TARGETHOST_LOGF")
- ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
- (thread-join! log-rotate)
- (pop-directory)))
-
-;; given a path to a server log return: host port startseconds server-id
-;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
-;; example of what it's looking for in the log file:
-;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
-
-(define (server:logf-get-start-info logf)
- (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
- (dbprep-rx (regexp "^SERVER: dbprep"))
- (dbprep-found 0)
- (bad-dat (list #f #f #f #f #f)))
- (handle-exceptions
- exn
- (begin
- ;; WARNING: this is potentially dangerous to blanket ignore the errors
- (if (file-exists? logf)
- (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
- bad-dat) ;; no idea what went wrong, call it a bad server
- (with-input-from-file
- logf
- (lambda ()
- (let loop ((inl (read-line))
- (lnum 0))
- (if (not (eof-object? inl))
- (let ((mlst (string-match server-rx inl))
- (dbprep (string-match dbprep-rx inl)))
- (if dbprep (set! dbprep-found 1))
- (if (not mlst)
- (if (< lnum 500) ;; give up if more than 500 lines of server log read
- (loop (read-line)(+ lnum 1))
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
- bad-dat))
- (match mlst
- ((_ host port start server-id pid)
- (list host
- (string->number port)
- (string->number start)
- server-id
- (string->number pid)))
- (else
- (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
- bad-dat))))
- (begin
- (if dbprep-found
- (begin
- (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
- (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
- (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
- bad-dat))))))))
-
-;; ;; get a list of servers from the log files, with all relevant data
-;; ;; ( mod-time host port start-time pid )
-;; ;;
-;; (define (server:get-list areapath #!key (limit #f))
-;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
-;; (day-seconds (* 24 60 60)))
-;; ;; if the directory exists continue to get the list
-;; ;; otherwise attempt to create the logs dir and then
-;; ;; continue
-;; (if (if (directory-exists? (conc areapath "/logs"))
-;; '()
-;; (if (file-write-access? areapath)
-;; (begin
-;; (condition-case
-;; (create-directory (conc areapath "/logs") #t)
-;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
-;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
-;; (directory-exists? (conc areapath "/logs")))
-;; '()))
-;;
-;; ;; Get the list of server logs.
-;; (let* (
-;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
-;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
-;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
-;; (num-serv-logs (length server-logs)))
-;; (if (or (null? server-logs) (= num-serv-logs 0))
-;; (let ()
-;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
-;; '()
-;; )
-;; (let loop ((hed (string-chomp (car server-logs)))
-;; (tal (cdr server-logs))
-;; (res '()))
-;; (let* ((mod-time (handle-exceptions
-;; exn
-;; (begin
-;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
-;; (current-seconds)) ;; 0
-;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
-;; (down-time (- (current-seconds) mod-time))
-;; (serv-dat (if (or (< num-serv-logs 10)
-;; (< down-time 900)) ;; day-seconds))
-;; (server:logf-get-start-info hed)
-;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
-;; (serv-rec (cons mod-time serv-dat))
-;; (fmatch (string-match fname-rx hed))
-;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
-;; (new-res (if (null? serv-dat)
-;; res
-;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
-;; (if (null? tal)
-;; (if (and limit
-;; (> (length new-res) limit))
-;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
-;; new-res)
-;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
-
-#;(define (server:get-num-alive srvlst)
- (let ((num-alive 0))
- (for-each
- (lambda (server)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
- (match-let (((mod-time host port start-time server-id pid)
- server))
- (let* ((uptime (- (current-seconds) mod-time))
- (runtime (if start-time
- (- mod-time start-time)
- 0)))
- (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
- srvlst)
- num-alive))
-
-;; ;; given a list of servers get a list of valid servers, i.e. at least
-;; ;; 10 seconds old, has started and is less than 1 hour old and is
-;; ;; active (i.e. mod-time < 10 seconds
-;; ;;
-;; ;; mod-time host port start-time pid
-;; ;;
-;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
-;; ;; and servers should stick around for about two hours or so.
-;; ;;
-;; (define (server:get-best srvlst)
-;; (let* ((nums (server:get-num-servers))
-;; (now (current-seconds))
-;; (slst (sort
-;; (filter (lambda (rec)
-;; (if (and (list? rec)
-;; (> (length rec) 2))
-;; (let ((start-time (list-ref rec 3))
-;; (mod-time (list-ref rec 0)))
-;; ;; (print "start-time: " start-time " mod-time: " mod-time)
-;; (and start-time mod-time
-;; (> (- now start-time) 0) ;; been running at least 0 seconds
-;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
-;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
-;; (< (- now start-time)
-;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
-;; 180)
-;; (random 360)))) ;; under one hour running time +/- 180
-;; ))
-;; #f))
-;; srvlst)
-;; (lambda (a b)
-;; (< (list-ref a 3)
-;; (list-ref b 3))))))
-;; (if (> (length slst) nums)
-;; (take slst nums)
-;; slst)))
-
-;; ;; switch from server:get-list to server:get-servers-info
-;; ;;
-;; (define (server:get-first-best areapath)
-;; (let ((srvrs (server:get-best (server:get-list areapath))))
-;; (if (and srvrs
-;; (not (null? srvrs)))
-;; (car srvrs)
-;; #f)))
-;;
-;; (define (server:get-rand-best areapath)
-;; (let ((srvrs (server:get-best (server:get-list areapath))))
-;; (if (and (list? srvrs)
-;; (not (null? srvrs)))
-;; (let* ((len (length srvrs))
-;; (idx (random len)))
-;; (list-ref srvrs idx))
-;; #f)))
-
-(define (server:record->id servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
- #f)
- (match-let (((host port start-time server-id pid)
- servr))
- (if server-id
- server-id
- #f))))
-
-(define (server:record->url servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
- #f)
- (match-let (((host port start-time server-id pid)
- servr))
- (if (and host port)
- (conc host ":" port)
- #f))))
-
-
-;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
-;; if it is old enough, overwrite it and wait 0.25 seconds.
-;; if it then has the wrong server key, wait + 1 and call this function recursively.
-;;
-#;(define (server:wait-for-server-start-last-flag areapath)
- (let* ((start-flag (conc areapath "/logs/server-start-last"))
- ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
- (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
- (server-key (conc (get-host-name) "-" (current-process-id))))
- (if (file-exists? start-flag)
- (let* ((fmodtime (file-modification-time start-flag))
- (delta (- (current-seconds) fmodtime))
- (old-enough (> delta idletime))
- (new-server-key ""))
- ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t.
- ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
- (if (and old-enough
- (begin
- (debug:print-info 2 *default-log-port* "Writing " start-flag)
- (with-output-to-file start-flag (lambda () (print server-key)))
- (thread-sleep! 0.25)
- (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
- (equal? server-key new-server-key)))
- #t
- ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively.
- (begin
- (debug:print-info 0 *default-log-port* "Gating server start, last start: "
- (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
-
- (thread-sleep! ( + 1 idletime))
- (server:wait-for-server-start-last-flag areapath)))))))
-
-;; oldest server alive determines host then choose random of youngest
-;; five servers on that host
-;;
-(define (server:get-servers-info areapath)
- ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
- (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
- (if (not (file-exists? servinfodir))
- (create-directory servinfodir))
- (let* ((allfiles (glob (conc servinfodir"/*")))
- (res (make-hash-table)))
- (for-each
- (lambda (f)
- (let* ((hostport (pathname-strip-directory f))
- (serverdat (server:logf-get-start-info f)))
- (match serverdat
- ((host port start server-id pid)
- (if (and host port start server-id pid)
- (hash-table-set! res hostport serverdat)
- (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
- (else
- (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
- allfiles)
- res)))
-
-;; check the .servinfo directory, are there other servers running on this
-;; or another host?
-;;
-;; returns #t => ok to start another server
-;; #f => not ok to start another server
-;;
-(define (server:minimal-check areapath)
- (server:clean-up-old areapath)
- (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
- (servrs (glob (conc srvdir"/*")))
- (thishostip (server:get-best-guess-address (get-host-name)))
- (thisservrs (glob (conc srvdir"/"thishostip":*")))
- (homehostinf (server:choose-server areapath 'homehost))
- (havehome (car homehostinf))
- (wearehome (cdr homehostinf)))
- (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
- ", numservers: "(length thisservrs))
- (cond
- ((not havehome) #t) ;; no homehost yet, go for it
- ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
- ((and havehome (not wearehome)) #f) ;; we are not the home host
- ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
- (else
- (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
- #t))))
-
-
-(define server-last-start 0)
-
-
-;; oldest server alive determines host then choose random of youngest
-;; five servers on that host
-;;
-;; mode:
-;; best - get best server (random of newest five)
-;; home - get home host based on oldest server
-;; info - print info
-(define (server:choose-server areapath #!optional (mode 'best))
- ;; age is current-starttime
- ;; find oldest alive
- ;; 1. sort by age ascending and ping until good
- ;; find alive rand from youngest
- ;; 1. sort by age descending
- ;; 2. take five
- ;; 3. check alive, discard if not and repeat
- ;; first we clean up old server files
- (server:clean-up-old areapath)
- (let* ((since-last (- (current-seconds) server-last-start))
- (server-start-delay 10))
- (if ( < (- (current-seconds) server-last-start) 10 )
- (begin
- (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
- (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
- (thread-sleep! server-start-delay)
- )
- (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
- )
- )
- (let* ((serversdat (server:get-servers-info areapath))
- (servkeys (hash-table-keys serversdat))
- (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
- (sort servkeys ;; list of "host:port"
- (lambda (a b)
- (>= (list-ref (hash-table-ref serversdat a) 2)
- (list-ref (hash-table-ref serversdat b) 2))))
- '())))
- (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
- (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
- (if (not (null? by-time-asc))
- (let* ((oldest (last by-time-asc))
- (oldest-dat (hash-table-ref serversdat oldest))
- (host (list-ref oldest-dat 0))
- (all-valid (filter (lambda (x)
- (equal? host (list-ref (hash-table-ref serversdat x) 0)))
- by-time-asc))
- (best-ten (lambda ()
- (if (> (length all-valid) 11)
- (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
- (if (> (length all-valid) 8)
- (drop-right all-valid 1)
- all-valid))))
- (names->dats (lambda (names)
- (map (lambda (x)
- (hash-table-ref serversdat x))
- names)))
- (am-home? (lambda ()
- (let* ((currhost (get-host-name))
- (bestadrs (server:get-best-guess-address currhost)))
- (or (equal? host currhost)
- (equal? host bestadrs))))))
- (case mode
- ((info)
- (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
- (print "youngest: "(hash-table-ref serversdat (car all-valid))))
- ((home) host)
- ((homehost) (cons host (am-home?))) ;; shut up old code
- ((home?) (am-home?))
- ((best-ten)(names->dats (best-ten)))
- ((all-valid)(names->dats all-valid))
- ((best) (let* ((best-ten (best-ten))
- (len (length best-ten)))
- (hash-table-ref serversdat (list-ref best-ten (random len)))))
- ((count)(length all-valid))
- (else
- (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
- #f)))
- (begin
- (server:run areapath)
- (set! server-last-start (current-seconds))
- ;; (thread-sleep! 3)
- (case mode
- ((homehost) (cons #f #f))
- (else #f))))))
-
-(define (server:get-servinfo-dir areapath)
- (let* ((spath (conc areapath"/.servinfo")))
- (if (not (file-exists? spath))
- (create-directory spath #t))
- spath))
-
-(define (server:clean-up-old areapath)
- ;; any server file that has not been touched in ten minutes is effectively dead
- (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
- (for-each
- (lambda (sfile)
- (let* ((modtime (handle-exceptions
- exn
- (begin
- (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
- (current-seconds))
- (file-modification-time sfile))))
- (if (and (number? modtime)
- (> (- (current-seconds) modtime)
- 600))
- (begin
- (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
- (delete-file sfile))))))
- sfiles)))
-
-;; would like to eventually get rid of this
-;;
-(define (common:on-homehost?)
- (server:choose-server *toppath* 'home?))
-
-;; kind start up of server, wait before allowing another server for a given
-;; area to be launched
-;;
-(define (server:kind-run areapath)
- ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
- ;; and wait for it to be at least seconds old
- ;; (server:wait-for-server-start-last-flag areapath)
- (let loop ()
- (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
- (begin
- (if (common:low-noise-print 30 "our-host-load")
- (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
- (loop))))
- (if (< (server:choose-server areapath 'count) 20)
- (server:run areapath))
- #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
- (let* ((lock-file (conc areapath "/logs/server-start.lock")))
- (let* ((start-flag (conc areapath "/logs/server-start-last")))
- (common:simple-file-lock-and-wait lock-file expire-time: 25)
- (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
- (system (conc "touch " start-flag)) ;; lazy but safe
- (server:run areapath)
- (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
- (common:simple-file-release-lock lock-file)))
- (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
-
-;; this one seems to be the general entry point
-;;
-(define (server:start-and-wait areapath #!key (timeout 60))
- (let ((give-up-time (+ (current-seconds) timeout)))
- (let loop ((server-info (server:check-if-running areapath))
- (try-num 0))
- (if (or server-info
- (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
- (server:record->url server-info)
- (let* ( (servers (server:choose-server areapath 'all-valid))
- (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
- (if (and (> try-num 0) ;; first time through simply wait a little while then try again
- (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
- (server:run areapath))
- (thread-sleep! 5)
- (loop (server:check-if-running areapath)
- (+ try-num 1)))))))
-
-(define (server:get-num-servers #!key (numservers 2))
- (let ((ns (string->number
- (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
- (or ns numservers)))
-
-;; no longer care if multiple servers are started by accident. older servers will drop off in time.
-;;
-(define (server:check-if-running areapath) ;; #!key (numservers "2"))
- (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
- (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
- (if (or (and servers
- (null? servers))
- (not servers))
- ;; (and (list? servers)
- ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
- #f
- (let loop ((hed (car servers))
- (tal (cdr servers)))
- (let ((res (server:check-server hed)))
- (if res
- hed
- (if (null? tal)
- #f
- (loop (car tal)(cdr tal)))))))))
-
-;; ping the given server
-;;
-(define (server:check-server server-record)
- (let* ((server-url (server:record->url server-record))
- (server-id (server:record->id server-record))
- (res (server:ping server-url server-id)))
- (if res
- server-url
- #f)))
-
-(define (server:kill servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
- #f)
- (match-let (((mod-time hostname port start-time server-id pid)
- servr))
- (tasks:kill-server hostname pid))))
-
-;; called in megatest.scm, host-port is string hostname:port
-;;
-;; NOTE: This is NOT called directly from clients as not all transports support a client running
-;; in the same process as the server.
-;;
-(define (server:ping host:port server-id #!key (do-exit #f))
- (let* ((host-port (cond
- ((string? host:port)
- (let ((slst (string-split host:port ":")))
- (if (eq? (length slst) 2)
- (list (car slst)(string->number (cadr slst)))
- #f)))
- (else
- #f))))
- (cond
- ((and (list? host-port)
- (eq? (length host-port) 2))
- (let* ((myrunremote (make-remote))
- (iface (car host-port))
- (port (cadr host-port))
- (server-dat (client:connect iface port server-id myrunremote))
- (login-res (rmt:login-no-auto-client-setup myrunremote)))
- (if (and (list? login-res)
- (car login-res))
- (begin
- ;; (print "LOGIN_OK")
- (if do-exit (exit 0))
- #t)
- (begin
- ;; (print "LOGIN_FAILED")
- (if do-exit (exit 1))
- #f))))
- (else
- (if host:port
- (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
- (if do-exit
- (exit 1)
- #f)))))
-
-;; run ping in separate process, safest way in some cases
-;;
-(define (server:ping-server ifaceport)
- (with-input-from-pipe
- (conc (common:get-megatest-exe) " -ping " ifaceport)
- (lambda ()
- (let loop ((inl (read-line))
- (res "NOREPLY"))
- (if (eof-object? inl)
- (case (string->symbol res)
- ((NOREPLY) #f)
- ((LOGIN_OK) #t)
- (else #f))
- (loop (read-line) inl))))))
-
-;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
-;;
-(define (server:login toppath)
- (lambda (toppath)
- (set! *db-last-access* (current-seconds)) ;; might not be needed.
- (if (equal? *toppath* toppath)
- #t
- #f)))
-
-;; timeout is hms string: 1h 5m 3s, default is 1 minute
-;; This is currently broken. Just use the number of hours with no unit.
-;; Default is 60 seconds.
-;;
-(define (server:expiration-timeout)
- (let ((tmo (configf:lookup *configdat* "server" "timeout")))
- (if (and (string? tmo)
- (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
- (* 3600 (string->number tmo))
- 600)))
-
-(define (server:get-best-guess-address hostname)
- (let ((res #f))
- (for-each
- (lambda (adr)
- (if (not (eq? (u8vector-ref adr 0) 127))
- (set! res adr)))
- ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
- (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
- (string-intersperse
- (map number->string
- (u8vector->list
- (if res res (hostname->ip hostname)))) ".")))
-
-;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
-;; (define (server:release-sync-lock)
-;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
-;; (define (server:have-sync-lock?)
-;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
-;; (have-lock? (car have-lock-pair))
-;; (lock-time (cdr have-lock-pair))
-;; (lock-age (- (current-seconds) lock-time)))
-;; (cond
-;; (have-lock? #t)
-;; ((>lock-age
-;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
-;; (server:release-sync-lock)
-;; (server:have-sync-lock?))
-;; (else #f))))
-
-;; moving this here as it needs access to db and cannot be in common.
-;;
-
-(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
- (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
- (lambda ()
- (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
- #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
- (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
- (tmp-area (common:get-db-tmp-area))
- (tmp-db (conc tmp-area "/megatest.db"))
- (staging-file (conc *toppath* "/.megatest.db"))
- (mtdbfile (conc *toppath* "/megatest.db"))
- (lockfile (common:get-sync-lock-filepath))
- (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
- (sync-cmd (if fork-to-background
- (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
- sync-cmd-core))
- (default-min-intersync-delay 2)
- (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
- (default-duty-cycle 0.1)
- (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
- (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
- (calculate-off-time (lambda (work-duration duty-cycle)
- (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
- (off-time min-intersync-delay) ;; adjusted in closure below.
- (do-a-sync
- (lambda ()
- (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
- (let* ((finalres
- (let retry-loop ((num-tries 0))
- (if (common:simple-file-lock lockfile)
- (begin
- (cond
- ((not (or fork-to-background persist-until-sync))
- (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
- " , off-time="off-time" seconds ]")
- (thread-sleep! (max off-time min-intersync-delay)))
- (else
- (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
-
- (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
- (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
- (delete-file* staging-file)
- (let* ((start-time (current-milliseconds))
- (res (system sync-cmd))
- (dbbackupfile (conc mtdbfile ".backup"))
- (res2
- (cond
- ((eq? 0 res )
- (handle-exceptions
- exn
- #f
- (if (file-exists? dbbackupfile)
- (delete-file* dbbackupfile)
- )
- (if (eq? 0 (file-size sync-log))
- (delete-file* sync-log))
- (system (conc "/bin/mv " staging-file " " mtdbfile))
-
- (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
- (set! off-time (calculate-off-time
- last-sync-seconds
- (cond
- ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
- duty-cycle)
- (else
- (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
- default-duty-cycle))))
-
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
- 'sync-completed))
- (else
- (system (conc "/bin/cp "sync-log" "sync-log".fail"))
- (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
- (if (file-exists? (conc mtdbfile ".backup"))
- (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
- #f))))
- (common:simple-file-release-lock lockfile)
- (BB> "released lockfile: " lockfile)
- (when (common:file-exists? lockfile)
- (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
- res2) ;; end let
- );; end begin
- ;; else
- (cond
- (persist-until-sync
- (thread-sleep! 1)
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
- (retry-loop (add1 num-tries)))
- (else
- (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
- (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
- 'parallel-sync-in-progress))
- ) ;; end if got lockfile
- )
- ))
- (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
- finalres)
- ) ;; end lambda
- ))
- do-a-sync))
-
+;; (require-extension (srfi 18) extras tcp s11n)
+;;
+;; (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
+;; directory-utils posix-extras matchable utils)
+;;
+;; (use spiffy uri-common intarweb http-client spiffy-request-vars)
+;;
+;; (declare (unit server))
+;;
+;; (declare (uses commonmod))
+;;
+;; (declare (uses common))
+;; (declare (uses db))
+;; (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
+;; ;; (declare (uses synchash))
+;; (declare (uses http-transport))
+;; ;;(declare (uses rpc-transport))
+;; (declare (uses launch))
+;; ;; (declare (uses daemon))
+;;
+;; (import commonmod)
+;;
+;; (include "common_records.scm")
+;; (include "db_records.scm")
+;;
+;; (define (server:make-server-url hostport)
+;; (if (not hostport)
+;; #f
+;; (conc "http://" (car hostport) ":" (cadr hostport))))
+;;
+;; (define *server-loop-heart-beat* (current-seconds))
+;;
+;; ;;======================================================================
+;; ;; P K T S S T U F F
+;; ;;======================================================================
+;;
+;; ;; ???
+;;
+;; ;;======================================================================
+;; ;; P K T S S T U F F
+;; ;;======================================================================
+;;
+;; ;; ???
+;;
+;; ;;======================================================================
+;; ;; S E R V E R
+;; ;;======================================================================
+;;
+;; ;; Call this to start the actual server
+;; ;;
+;;
+;; ;;======================================================================
+;; ;; S E R V E R U T I L I T I E S
+;; ;;======================================================================
+;;
+;; ;; Get the transport
+;; (define (server:get-transport)
+;; (if *transport-type*
+;; *transport-type*
+;; (let ((ttype (string->symbol
+;; (or (args:get-arg "-transport")
+;; (configf:lookup *configdat* "server" "transport")
+;; "rpc"))))
+;; (set! *transport-type* ttype)
+;; ttype)))
+;;
+;; ;; Generate a unique signature for this server
+;; (define (server:mk-signature)
+;; (message-digest-string (md5-primitive)
+;; (with-output-to-string
+;; (lambda ()
+;; (write (list (current-directory)
+;; (current-process-id)
+;; (argv)))))))
+;;
+;; (define (server:get-client-signature)
+;; (if *my-client-signature* *my-client-signature*
+;; (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
+;; (set! *my-client-signature* sig)
+;; *my-client-signature*)))
+;;
+;; (define (server:get-server-id)
+;; (if *server-id* *server-id*
+;; (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
+;; (set! *server-id* sig)
+;; *server-id*)))
+;;
+;; ;; When using zmq this would send the message back (two step process)
+;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;;
+;; (define (server:reply return-addr query-sig success/fail result)
+;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;; ;; (send-message pubsock target send-more: #t)
+;; ;; (send-message pubsock
+;; (case (server:get-transport)
+;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
+;; ((http) (db:obj->string (vector success/fail query-sig result)))
+;; ((fs) result)
+;; (else
+;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;; result)))
+;;
+;; ;; Given an area path, start a server process ### NOTE ### > file 2>&1
+;; ;; if the target-host is set
+;; ;; try running on that host
+;; ;; incidental: rotate logs in logs/ dir.
+;; ;;
+;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+;; (let* ((testsuite (common:get-testsuite-name))
+;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+;; (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
+;; ""))
+;; (cmdln (conc (common:get-megatest-exe)
+;; " -server - ";; (or target-host "-")
+;; (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+;; " -daemonize "
+;; "")
+;; ;; " -log " logfile
+;; " -m testsuite:" testsuite
+;; " " profile-mode
+;; )) ;; (conc " >> " logfile " 2>&1 &")))))
+;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
+;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
+;; ;; we want the remote server to start in *toppath* so push there
+;; (push-directory areapath)
+;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+;; (thread-start! log-rotate)
+;;
+;; ;; host.domain.tld match host?
+;; ;; (if (and target-host
+;; ;; ;; look at target host, is it host.domain.tld or ip address and does it
+;; ;; ;; match current ip or hostname
+;; ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+;; ;; (not (equal? curr-ip target-host)))
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+;; ;; (setenv "TARGETHOST" target-host)))
+;; ;;
+;; (setenv "TARGETHOST_LOGF" logfile)
+;; (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
+;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
+;; (system (conc "nbfake " cmdln))
+;; (unsetenv "TARGETHOST_LOGF")
+;; ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+;; (thread-join! log-rotate)
+;; (pop-directory)))
+;;
+;; ;; given a path to a server log return: host port startseconds server-id
+;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
+;; ;; example of what it's looking for in the log file:
+;; ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
+;;
+;; (define (server:logf-get-start-info logf)
+;; (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
+;; (dbprep-rx (regexp "^SERVER: dbprep"))
+;; (dbprep-found 0)
+;; (bad-dat (list #f #f #f #f #f)))
+;; (handle-exceptions
+;; exn
+;; (begin
+;; ;; WARNING: this is potentially dangerous to blanket ignore the errors
+;; (if (file-exists? logf)
+;; (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
+;; bad-dat) ;; no idea what went wrong, call it a bad server
+;; (with-input-from-file
+;; logf
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (lnum 0))
+;; (if (not (eof-object? inl))
+;; (let ((mlst (string-match server-rx inl))
+;; (dbprep (string-match dbprep-rx inl)))
+;; (if dbprep (set! dbprep-found 1))
+;; (if (not mlst)
+;; (if (< lnum 500) ;; give up if more than 500 lines of server log read
+;; (loop (read-line)(+ lnum 1))
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
+;; bad-dat))
+;; (match mlst
+;; ((_ host port start server-id pid)
+;; (list host
+;; (string->number port)
+;; (string->number start)
+;; server-id
+;; (string->number pid)))
+;; (else
+;; (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
+;; bad-dat))))
+;; (begin
+;; (if dbprep-found
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
+;; (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
+;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
+;; bad-dat))))))))
+;;
+;; ;; ;; get a list of servers from the log files, with all relevant data
+;; ;; ;; ( mod-time host port start-time pid )
+;; ;; ;;
+;; ;; (define (server:get-list areapath #!key (limit #f))
+;; ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
+;; ;; (day-seconds (* 24 60 60)))
+;; ;; ;; if the directory exists continue to get the list
+;; ;; ;; otherwise attempt to create the logs dir and then
+;; ;; ;; continue
+;; ;; (if (if (directory-exists? (conc areapath "/logs"))
+;; ;; '()
+;; ;; (if (file-write-access? areapath)
+;; ;; (begin
+;; ;; (condition-case
+;; ;; (create-directory (conc areapath "/logs") #t)
+;; ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
+;; ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
+;; ;; (directory-exists? (conc areapath "/logs")))
+;; ;; '()))
+;; ;;
+;; ;; ;; Get the list of server logs.
+;; ;; (let* (
+;; ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
+;; ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
+;; ;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
+;; ;; (num-serv-logs (length server-logs)))
+;; ;; (if (or (null? server-logs) (= num-serv-logs 0))
+;; ;; (let ()
+;; ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
+;; ;; '()
+;; ;; )
+;; ;; (let loop ((hed (string-chomp (car server-logs)))
+;; ;; (tal (cdr server-logs))
+;; ;; (res '()))
+;; ;; (let* ((mod-time (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
+;; ;; (current-seconds)) ;; 0
+;; ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
+;; ;; (down-time (- (current-seconds) mod-time))
+;; ;; (serv-dat (if (or (< num-serv-logs 10)
+;; ;; (< down-time 900)) ;; day-seconds))
+;; ;; (server:logf-get-start-info hed)
+;; ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
+;; ;; (serv-rec (cons mod-time serv-dat))
+;; ;; (fmatch (string-match fname-rx hed))
+;; ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
+;; ;; (new-res (if (null? serv-dat)
+;; ;; res
+;; ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
+;; ;; (if (null? tal)
+;; ;; (if (and limit
+;; ;; (> (length new-res) limit))
+;; ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
+;; ;; new-res)
+;; ;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
+;;
+;; #;(define (server:get-num-alive srvlst)
+;; (let ((num-alive 0))
+;; (for-each
+;; (lambda (server)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
+;; (match-let (((mod-time host port start-time server-id pid)
+;; server))
+;; (let* ((uptime (- (current-seconds) mod-time))
+;; (runtime (if start-time
+;; (- mod-time start-time)
+;; 0)))
+;; (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
+;; srvlst)
+;; num-alive))
+;;
+;; ;; ;; given a list of servers get a list of valid servers, i.e. at least
+;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is
+;; ;; ;; active (i.e. mod-time < 10 seconds
+;; ;; ;;
+;; ;; ;; mod-time host port start-time pid
+;; ;; ;;
+;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
+;; ;; ;; and servers should stick around for about two hours or so.
+;; ;; ;;
+;; ;; (define (server:get-best srvlst)
+;; ;; (let* ((nums (server:get-num-servers))
+;; ;; (now (current-seconds))
+;; ;; (slst (sort
+;; ;; (filter (lambda (rec)
+;; ;; (if (and (list? rec)
+;; ;; (> (length rec) 2))
+;; ;; (let ((start-time (list-ref rec 3))
+;; ;; (mod-time (list-ref rec 0)))
+;; ;; ;; (print "start-time: " start-time " mod-time: " mod-time)
+;; ;; (and start-time mod-time
+;; ;; (> (- now start-time) 0) ;; been running at least 0 seconds
+;; ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
+;; ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
+;; ;; (< (- now start-time)
+;; ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
+;; ;; 180)
+;; ;; (random 360)))) ;; under one hour running time +/- 180
+;; ;; ))
+;; ;; #f))
+;; ;; srvlst)
+;; ;; (lambda (a b)
+;; ;; (< (list-ref a 3)
+;; ;; (list-ref b 3))))))
+;; ;; (if (> (length slst) nums)
+;; ;; (take slst nums)
+;; ;; slst)))
+;;
+;; ;; ;; switch from server:get-list to server:get-servers-info
+;; ;; ;;
+;; ;; (define (server:get-first-best areapath)
+;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; (if (and srvrs
+;; ;; (not (null? srvrs)))
+;; ;; (car srvrs)
+;; ;; #f)))
+;; ;;
+;; ;; (define (server:get-rand-best areapath)
+;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; (if (and (list? srvrs)
+;; ;; (not (null? srvrs)))
+;; ;; (let* ((len (length srvrs))
+;; ;; (idx (random len)))
+;; ;; (list-ref srvrs idx))
+;; ;; #f)))
+;;
+;; (define (server:record->id servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((host port start-time server-id pid)
+;; servr))
+;; (if server-id
+;; server-id
+;; #f))))
+;;
+;; (define (server:record->url servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((host port start-time server-id pid)
+;; servr))
+;; (if (and host port)
+;; (conc host ":" port)
+;; #f))))
+;;
+;;
+;; ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
+;; ;; if it is old enough, overwrite it and wait 0.25 seconds.
+;; ;; if it then has the wrong server key, wait + 1 and call this function recursively.
+;; ;;
+;; #;(define (server:wait-for-server-start-last-flag areapath)
+;; (let* ((start-flag (conc areapath "/logs/server-start-last"))
+;; ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
+;; (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
+;; (server-key (conc (get-host-name) "-" (current-process-id))))
+;; (if (file-exists? start-flag)
+;; (let* ((fmodtime (file-modification-time start-flag))
+;; (delta (- (current-seconds) fmodtime))
+;; (old-enough (> delta idletime))
+;; (new-server-key ""))
+;; ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t.
+;; ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
+;; (if (and old-enough
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Writing " start-flag)
+;; (with-output-to-file start-flag (lambda () (print server-key)))
+;; (thread-sleep! 0.25)
+;; (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
+;; (equal? server-key new-server-key)))
+;; #t
+;; ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively.
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Gating server start, last start: "
+;; (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
+;;
+;; (thread-sleep! ( + 1 idletime))
+;; (server:wait-for-server-start-last-flag areapath)))))))
+;;
+;; ;; oldest server alive determines host then choose random of youngest
+;; ;; five servers on that host
+;; ;;
+;; (define (server:get-servers-info areapath)
+;; ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
+;; (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
+;; (if (not (file-exists? servinfodir))
+;; (create-directory servinfodir))
+;; (let* ((allfiles (glob (conc servinfodir"/*")))
+;; (res (make-hash-table)))
+;; (for-each
+;; (lambda (f)
+;; (let* ((hostport (pathname-strip-directory f))
+;; (serverdat (server:logf-get-start-info f)))
+;; (match serverdat
+;; ((host port start server-id pid)
+;; (if (and host port start server-id pid)
+;; (hash-table-set! res hostport serverdat)
+;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
+;; (else
+;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
+;; allfiles)
+;; res)))
+;;
+;; ;; check the .servinfo directory, are there other servers running on this
+;; ;; or another host?
+;; ;;
+;; ;; returns #t => ok to start another server
+;; ;; #f => not ok to start another server
+;; ;;
+;; (define (server:minimal-check areapath)
+;; (server:clean-up-old areapath)
+;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
+;; (servrs (glob (conc srvdir"/*")))
+;; (thishostip (server:get-best-guess-address (get-host-name)))
+;; (thisservrs (glob (conc srvdir"/"thishostip":*")))
+;; (homehostinf (server:choose-server areapath 'homehost))
+;; (havehome (car homehostinf))
+;; (wearehome (cdr homehostinf)))
+;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
+;; ", numservers: "(length thisservrs))
+;; (cond
+;; ((not havehome) #t) ;; no homehost yet, go for it
+;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
+;; ((and havehome (not wearehome)) #f) ;; we are not the home host
+;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
+;; (else
+;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
+;; #t))))
+;;
+;;
+;; (define server-last-start 0)
+;;
+;;
+;; ;; oldest server alive determines host then choose random of youngest
+;; ;; five servers on that host
+;; ;;
+;; ;; mode:
+;; ;; best - get best server (random of newest five)
+;; ;; home - get home host based on oldest server
+;; ;; info - print info
+;; (define (server:choose-server areapath #!optional (mode 'best))
+;; ;; age is current-starttime
+;; ;; find oldest alive
+;; ;; 1. sort by age ascending and ping until good
+;; ;; find alive rand from youngest
+;; ;; 1. sort by age descending
+;; ;; 2. take five
+;; ;; 3. check alive, discard if not and repeat
+;; ;; first we clean up old server files
+;; (server:clean-up-old areapath)
+;; (let* ((since-last (- (current-seconds) server-last-start))
+;; (server-start-delay 10))
+;; (if ( < (- (current-seconds) server-last-start) 10 )
+;; (begin
+;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
+;; (thread-sleep! server-start-delay)
+;; )
+;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; )
+;; )
+;; (let* ((serversdat (server:get-servers-info areapath))
+;; (servkeys (hash-table-keys serversdat))
+;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
+;; (sort servkeys ;; list of "host:port"
+;; (lambda (a b)
+;; (>= (list-ref (hash-table-ref serversdat a) 2)
+;; (list-ref (hash-table-ref serversdat b) 2))))
+;; '())))
+;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
+;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
+;; (if (not (null? by-time-asc))
+;; (let* ((oldest (last by-time-asc))
+;; (oldest-dat (hash-table-ref serversdat oldest))
+;; (host (list-ref oldest-dat 0))
+;; (all-valid (filter (lambda (x)
+;; (equal? host (list-ref (hash-table-ref serversdat x) 0)))
+;; by-time-asc))
+;; (best-ten (lambda ()
+;; (if (> (length all-valid) 11)
+;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
+;; (if (> (length all-valid) 8)
+;; (drop-right all-valid 1)
+;; all-valid))))
+;; (names->dats (lambda (names)
+;; (map (lambda (x)
+;; (hash-table-ref serversdat x))
+;; names)))
+;; (am-home? (lambda ()
+;; (let* ((currhost (get-host-name))
+;; (bestadrs (server:get-best-guess-address currhost)))
+;; (or (equal? host currhost)
+;; (equal? host bestadrs))))))
+;; (case mode
+;; ((info)
+;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+;; (print "youngest: "(hash-table-ref serversdat (car all-valid))))
+;; ((home) host)
+;; ((homehost) (cons host (am-home?))) ;; shut up old code
+;; ((home?) (am-home?))
+;; ((best-ten)(names->dats (best-ten)))
+;; ((all-valid)(names->dats all-valid))
+;; ((best) (let* ((best-ten (best-ten))
+;; (len (length best-ten)))
+;; (hash-table-ref serversdat (list-ref best-ten (random len)))))
+;; ((count)(length all-valid))
+;; (else
+;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
+;; #f)))
+;; (begin
+;; (server:run areapath)
+;; (set! server-last-start (current-seconds))
+;; ;; (thread-sleep! 3)
+;; (case mode
+;; ((homehost) (cons #f #f))
+;; (else #f))))))
+;;
+;; (define (server:get-servinfo-dir areapath)
+;; (let* ((spath (conc areapath"/.servinfo")))
+;; (if (not (file-exists? spath))
+;; (create-directory spath #t))
+;; spath))
+;;
+;; (define (server:clean-up-old areapath)
+;; ;; any server file that has not been touched in ten minutes is effectively dead
+;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
+;; (for-each
+;; (lambda (sfile)
+;; (let* ((modtime (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
+;; (current-seconds))
+;; (file-modification-time sfile))))
+;; (if (and (number? modtime)
+;; (> (- (current-seconds) modtime)
+;; 600))
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
+;; (delete-file sfile))))))
+;; sfiles)))
+;;
+;; ;; would like to eventually get rid of this
+;; ;;
+;; (define (common:on-homehost?)
+;; (server:choose-server *toppath* 'home?))
+;;
+;; ;; kind start up of server, wait before allowing another server for a given
+;; ;; area to be launched
+;; ;;
+;; (define (server:kind-run areapath)
+;; ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
+;; ;; and wait for it to be at least seconds old
+;; ;; (server:wait-for-server-start-last-flag areapath)
+;; (let loop ()
+;; (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
+;; (begin
+;; (if (common:low-noise-print 30 "our-host-load")
+;; (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
+;; (loop))))
+;; (if (< (server:choose-server areapath 'count) 20)
+;; (server:run areapath))
+;; #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+;; (let* ((lock-file (conc areapath "/logs/server-start.lock")))
+;; (let* ((start-flag (conc areapath "/logs/server-start-last")))
+;; (common:simple-file-lock-and-wait lock-file expire-time: 25)
+;; (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
+;; (system (conc "touch " start-flag)) ;; lazy but safe
+;; (server:run areapath)
+;; (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
+;; (common:simple-file-release-lock lock-file)))
+;; (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
+;;
+;; ;; this one seems to be the general entry point
+;; ;;
+;; (define (server:start-and-wait areapath #!key (timeout 60))
+;; (let ((give-up-time (+ (current-seconds) timeout)))
+;; (let loop ((server-info (server:check-if-running areapath))
+;; (try-num 0))
+;; (if (or server-info
+;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
+;; (server:record->url server-info)
+;; (let* ( (servers (server:choose-server areapath 'all-valid))
+;; (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
+;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again
+;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
+;; (server:run areapath))
+;; (thread-sleep! 5)
+;; (loop (server:check-if-running areapath)
+;; (+ try-num 1)))))))
+;;
+;; (define (server:get-num-servers #!key (numservers 2))
+;; (let ((ns (string->number
+;; (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
+;; (or ns numservers)))
+;;
+;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time.
+;; ;;
+;; (define (server:check-if-running areapath) ;; #!key (numservers "2"))
+;; (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
+;; (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
+;; (if (or (and servers
+;; (null? servers))
+;; (not servers))
+;; ;; (and (list? servers)
+;; ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
+;; #f
+;; (let loop ((hed (car servers))
+;; (tal (cdr servers)))
+;; (let ((res (server:check-server hed)))
+;; (if res
+;; hed
+;; (if (null? tal)
+;; #f
+;; (loop (car tal)(cdr tal)))))))))
+;;
+;; ;; ping the given server
+;; ;;
+;; (define (server:check-server server-record)
+;; (let* ((server-url (server:record->url server-record))
+;; (server-id (server:record->id server-record))
+;; (res (server:ping server-url server-id)))
+;; (if res
+;; server-url
+;; #f)))
+;;
+;; (define (server:kill servr)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
+;; #f)
+;; (match-let (((mod-time hostname port start-time server-id pid)
+;; servr))
+;; (tasks:kill-server hostname pid))))
+;;
+;; ;; called in megatest.scm, host-port is string hostname:port
+;; ;;
+;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
+;; ;; in the same process as the server.
+;; ;;
+;; (define (server:ping host:port server-id #!key (do-exit #f))
+;; (let* ((host-port (cond
+;; ((string? host:port)
+;; (let ((slst (string-split host:port ":")))
+;; (if (eq? (length slst) 2)
+;; (list (car slst)(string->number (cadr slst)))
+;; #f)))
+;; (else
+;; #f))))
+;; (cond
+;; ((and (list? host-port)
+;; (eq? (length host-port) 2))
+;; (let* ((myrunremote (make-remote))
+;; (iface (car host-port))
+;; (port (cadr host-port))
+;; (server-dat (client:connect iface port server-id myrunremote))
+;; (login-res (rmt:login-no-auto-client-setup myrunremote)))
+;; (if (and (list? login-res)
+;; (car login-res))
+;; (begin
+;; ;; (print "LOGIN_OK")
+;; (if do-exit (exit 0))
+;; #t)
+;; (begin
+;; ;; (print "LOGIN_FAILED")
+;; (if do-exit (exit 1))
+;; #f))))
+;; (else
+;; (if host:port
+;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
+;; (if do-exit
+;; (exit 1)
+;; #f)))))
+;;
+;; ;; run ping in separate process, safest way in some cases
+;; ;;
+;; (define (server:ping-server ifaceport)
+;; (with-input-from-pipe
+;; (conc (common:get-megatest-exe) " -ping " ifaceport)
+;; (lambda ()
+;; (let loop ((inl (read-line))
+;; (res "NOREPLY"))
+;; (if (eof-object? inl)
+;; (case (string->symbol res)
+;; ((NOREPLY) #f)
+;; ((LOGIN_OK) #t)
+;; (else #f))
+;; (loop (read-line) inl))))))
+;;
+;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;; ;;
+;; (define (server:login toppath)
+;; (lambda (toppath)
+;; (set! *db-last-access* (current-seconds)) ;; might not be needed.
+;; (if (equal? *toppath* toppath)
+;; #t
+;; #f)))
+;;
+;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute
+;; ;; This is currently broken. Just use the number of hours with no unit.
+;; ;; Default is 60 seconds.
+;; ;;
+;; (define (server:expiration-timeout)
+;; (let ((tmo (configf:lookup *configdat* "server" "timeout")))
+;; (if (and (string? tmo)
+;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
+;; (* 3600 (string->number tmo))
+;; 600)))
+;;
+;; (define (server:get-best-guess-address hostname)
+;; (let ((res #f))
+;; (for-each
+;; (lambda (adr)
+;; (if (not (eq? (u8vector-ref adr 0) 127))
+;; (set! res adr)))
+;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+;; (string-intersperse
+;; (map number->string
+;; (u8vector->list
+;; (if res res (hostname->ip hostname)))) ".")))
+;;
+;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; ;; (define (server:release-sync-lock)
+;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; ;; (define (server:have-sync-lock?)
+;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;; ;; (have-lock? (car have-lock-pair))
+;; ;; (lock-time (cdr have-lock-pair))
+;; ;; (lock-age (- (current-seconds) lock-time)))
+;; ;; (cond
+;; ;; (have-lock? #t)
+;; ;; ((>lock-age
+;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;; ;; (server:release-sync-lock)
+;; ;; (server:have-sync-lock?))
+;; ;; (else #f))))
+;;
+;; ;; moving this here as it needs access to db and cannot be in common.
+;; ;;
+;;
+;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
+;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
+;; (lambda ()
+;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
+;; #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
+;; (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
+;; (tmp-area (common:get-db-tmp-area))
+;; (tmp-db (conc tmp-area "/megatest.db"))
+;; (staging-file (conc *toppath* "/.megatest.db"))
+;; (mtdbfile (conc *toppath* "/megatest.db"))
+;; (lockfile (common:get-sync-lock-filepath))
+;; (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
+;; (sync-cmd (if fork-to-background
+;; (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
+;; sync-cmd-core))
+;; (default-min-intersync-delay 2)
+;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
+;; (default-duty-cycle 0.1)
+;; (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
+;; (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
+;; (calculate-off-time (lambda (work-duration duty-cycle)
+;; (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
+;; (off-time min-intersync-delay) ;; adjusted in closure below.
+;; (do-a-sync
+;; (lambda ()
+;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
+;; (let* ((finalres
+;; (let retry-loop ((num-tries 0))
+;; (if (common:simple-file-lock lockfile)
+;; (begin
+;; (cond
+;; ((not (or fork-to-background persist-until-sync))
+;; (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
+;; " , off-time="off-time" seconds ]")
+;; (thread-sleep! (max off-time min-intersync-delay)))
+;; (else
+;; (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
+;;
+;; (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
+;; (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
+;; (delete-file* staging-file)
+;; (let* ((start-time (current-milliseconds))
+;; (res (system sync-cmd))
+;; (dbbackupfile (conc mtdbfile ".backup"))
+;; (res2
+;; (cond
+;; ((eq? 0 res )
+;; (handle-exceptions
+;; exn
+;; #f
+;; (if (file-exists? dbbackupfile)
+;; (delete-file* dbbackupfile)
+;; )
+;; (if (eq? 0 (file-size sync-log))
+;; (delete-file* sync-log))
+;; (system (conc "/bin/mv " staging-file " " mtdbfile))
+;;
+;; (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
+;; (set! off-time (calculate-off-time
+;; last-sync-seconds
+;; (cond
+;; ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
+;; duty-cycle)
+;; (else
+;; (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
+;; default-duty-cycle))))
+;;
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
+;; 'sync-completed))
+;; (else
+;; (system (conc "/bin/cp "sync-log" "sync-log".fail"))
+;; (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
+;; (if (file-exists? (conc mtdbfile ".backup"))
+;; (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
+;; #f))))
+;; (common:simple-file-release-lock lockfile)
+;; (BB> "released lockfile: " lockfile)
+;; (when (common:file-exists? lockfile)
+;; (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
+;; res2) ;; end let
+;; );; end begin
+;; ;; else
+;; (cond
+;; (persist-until-sync
+;; (thread-sleep! 1)
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
+;; (retry-loop (add1 num-tries)))
+;; (else
+;; (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
+;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
+;; 'parallel-sync-in-progress))
+;; ) ;; end if got lockfile
+;; )
+;; ))
+;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
+;; finalres)
+;; ) ;; end lambda
+;; ))
+;; do-a-sync))
+;;
+;;
ADDED servermod.scm
Index: servermod.scm
==================================================================
--- /dev/null
+++ servermod.scm
@@ -0,0 +1,1111 @@
+;; Copyright 2006-2023, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+;;======================================================================
+
+(declare (unit servermod))
+(declare (uses artifacts))
+
+(use md5 message-digest posix typed-records extras)
+
+(module servermod
+*
+
+(import scheme
+ chicken
+
+ extras
+ md5
+ message-digest
+ ports
+ posix
+ srfi-18
+
+ typed-records
+ data-structures
+
+ artifacts
+ )
+
+(defstruct srv
+ (areapath #f)
+ (host #f)
+ (pid #f)
+ (type #f)
+ (sdir #f) ;; .server directory
+ (hdir #f) ;; .server/host.pid directory
+ (incoming #f)
+ (dbstruct #f)
+ (handler #f)
+ (obj-to-str #f)
+ (str-to-obj #f)
+ )
+
+;; nearly every process in Megatest (if write access) starts a server so it
+;; can receive messages to exit on request
+;; servers have a type, mtserve, dboard, runner, execute? TOO COMPLICATED.
+
+;; one server per run db file would be ideal.
+
+;; mtrah/.servers/./incoming/*.artifact
+;; | `attic
+;; |
+;; `outgoing/./*.artifact
+;; | `attic
+;; `.host:port
+
+;; on exit processes clean up. only mtserv or dboard clean up abandoned records?
+
+;; IDEA: All requests could go into one directory instead of server specific directory - need locking
+;; don't get multiple processing of arfs
+
+;; server:setup - setup the directory
+;; server:launch - start a new mtserve process, possibly
+;; using a launcher
+;; server:run - run the long running thread that monitors
+;; the .server area
+;; server:exit - shutdown the server and exit
+;; server:handle-request - take incoming request, process it, send response
+;; back via best or fastest available transport
+
+;; call this with handler that takes dbstruct cmd and params after doing server:setup
+;; and before starting server:run
+;;
+(define (server:set-handler srvdat handler)
+ (srv-handler-set! srvdat handler))
+
+;; set up the server area and return a server struct
+;; NOTE: This will need to be gated by write-access
+;;
+(define (server:setup areapath)
+ (let* ((srvdat (make-srv
+ areapath: areapath
+ host: (get-host-name) ;; likely need to replace with ip address
+ pid: (current-process-id)
+ sdir: (conc areapath"/.server") ;; put server artifacts here
+ ))
+ (hdir (conc (srv-sdir srvdat)"/"(get-host.pid srvdat))))
+ (srv-hdir-set! srvdat hdir)
+ (srv-incoming-set! srvdat (conc hdir"/incoming"))
+ (create-directory hdir #t)
+ (for-each (lambda (d)
+ (create-directory (conc hdir"/"d)))
+ '("incoming" "responses"))
+ srvdat))
+
+(define *server-keep-running* #f)
+
+;; to cleanly shut the server down set *server-keep-running* to #f
+;;
+(define (server:run srvdat)
+ ;; create server arf
+ ;; put arf in srvdat-dir
+ ;; forever
+ ;; scan incoming dir
+ ;; foreach arf
+ ;; bundle into with-transaction, no-transaction
+ ;; foreach bundle
+ ;; process the request
+ ;; create results arf and write it to clients dir
+ ;; remove in-arf from incoming
+ (let* ((areapath (srv-areapath srvdat))
+ (sdir (srv-sdir srvdat))
+ (hdir (srv-hdir srvdat))
+ (myarf `((h . ,(srv-host srvdat))
+ (i . ,(srv-pid srvdat))
+ (d . ,hdir)))
+ (myuuid (write-alist->artifact sdir myarf ptype: 'S))
+ (arf-fname (get-artifact-fname sdir myuuid))
+ (dbstruct (srv-dbstruct srvdat)))
+ (set! *server-keep-running* #t)
+ (let loop ((last-access (current-seconds)))
+ (let* ((start (current-milliseconds))
+ (res (server:process-incoming srvdat))
+ (delta (- (current-milliseconds) start))
+ (timed-out (> (- (current-seconds) last-access)
+ 60))) ;; accessed in last 60 seconds
+ (if timed-out
+ (begin
+ (print "INFO: server has not been accessed in 60 seconds, exiting shortly.")
+ (set! *server-keep-running* #f))
+ (thread-sleep! (if (> delta 500)
+ 0.1
+ 0.9)))
+ (if (or (> res 0) ;; res is the number of requests that were found and processed
+ *server-keep-running*)
+ (loop (if (> res 0)
+ (current-seconds)
+ last-access)
+ ))))
+ (delete-file arf-fname)
+ ))
+
+;; read arfs from incoming, process them and put result arfs in proper dirs
+;; return number requests found and processed
+;;
+(define (server:process-incoming srvdat)
+ (let* ((sdir (srv-sdir srvdat))
+ (hdir (srv-hdir srvdat))
+ (indir (srv-incoming srvdat))
+ (arfs (glob (conc indir"/*.artifacts")))
+ (handler (srv-handler srvdat))
+ (obj->string (srv-obj-to-str srvdat))
+ (dbstruct (srv-dbstruct srvdat)))
+ (let loop ((rem arfs))
+ (if (not (null? arfs))
+ (let* ((arf (car rem))
+ (dat (read-artifact->alist arf))
+ (ruuid (alist-ref 'Z dat))
+ (host (alist-ref 'h dat))
+ (pid (alist-ref 'i dat))
+ (dest (conc sdir"/"host"."pid"/responses")) ;; the calling host area
+ (cmd (alist-ref 'c dat))
+ (params (alist-ref 'p dat))
+ (res (handler dbstruct cmd params))
+ (narf `((r . ,(obj->string res))
+ (P . ,ruuid))))
+ (delete-file arf) ;; add ability to save in bundles in archive area
+ (write-alist->artifact dest narf ptype: 'Q)
+ (loop (cdr rem)))))
+ (length arfs)))
+
+;; start a server process (NOT start server in this process)
+;;
+;; maybe check load before calling this?
+(define (server:launch areapath)
+ (let* ((logd (conc areapath"/logs"))
+ (logf (conc logd"/from-"(get-host.pid #f)".log")))
+ (if (not (file-exists? logd))(create-directory logd #t))
+ (setenv "NBFAKE_LOG" logf)
+ (system (conc "nbfake mtserve -start-dir "areapath))))
+
+
+
+;;======================================================================
+;; OLD SERVER STUFF BELOW HERE
+;;======================================================================
+
+;; ;; servers start by setting up fs transport
+;; ;; and put a flag file for that ASAP.
+;; ;; they then set up tcp and put a flag file for
+;; ;; that
+;; ;;
+;; (define *client-server-id* #f)
+;;
+;; ;; oldest server alive determines host then choose random of youngest
+;; ;; five servers on that host
+;; ;;
+;; ;; mode:
+;; ;; best - get best server (random of newest five)
+;; ;; home - get home host based on oldest server
+;; ;; info - print info
+;; (define (server:choose-server areapath #!optional (mode 'best))
+;; ;; age is current-starttime
+;; ;; find oldest alive
+;; ;; 1. sort by age ascending and ping until good
+;; ;; find alive rand from youngest
+;; ;; 1. sort by age descending
+;; ;; 2. take five
+;; ;; 3. check alive, discard if not and repeat
+;; ;; first we clean up old server files
+;; (server:clean-up-old areapath)
+;; ;; (let* ((since-last (- (current-seconds) server-last-start))
+;; ;; (server-start-delay 10))
+;; ;; (if ( < (- (current-seconds) server-last-start) 10 )
+;; ;; (begin
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
+;; ;; (thread-sleep! server-start-delay)
+;; ;; )
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; ;; )
+;; (let* ((serversdat (server:get-servers-info areapath))
+;; (servkeys (hash-table-keys serversdat))
+;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
+;; (sort servkeys ;; list of "host:port"
+;; (lambda (a b)
+;; (>= (list-ref (hash-table-ref serversdat a) 2)
+;; (list-ref (hash-table-ref serversdat b) 2))))
+;; '())))
+;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
+;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
+;; (if (not (null? by-time-asc))
+;; (let* ((oldest (last by-time-asc))
+;; (oldest-dat (hash-table-ref serversdat oldest))
+;; (host (list-ref oldest-dat 0))
+;; (all-valid (filter (lambda (x)
+;; (equal? host (list-ref (hash-table-ref serversdat x) 0)))
+;; by-time-asc))
+;; (best-ten (lambda ()
+;; (if (> (length all-valid) 11)
+;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
+;; (if (> (length all-valid) 8)
+;; (drop-right all-valid 1)
+;; all-valid))))
+;; (names->dats (lambda (names)
+;; (map (lambda (x)
+;; (hash-table-ref serversdat x))
+;; names)))
+;; (am-home? (lambda ()
+;; (let* ((currhost (get-host-name))
+;; (bestadrs (server:get-best-guess-address currhost)))
+;; (or (equal? host currhost)
+;; (equal? host bestadrs))))))
+;; (case mode
+;; ((info)
+;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+;; (print "youngest: "(hash-table-ref serversdat (car all-valid))))
+;; ((home) host)
+;; ((homehost) (cons host (am-home?))) ;; shut up old code
+;; ((home?) (am-home?))
+;; ((best-ten)(names->dats (best-ten)))
+;; ((all-valid)(names->dats all-valid))
+;; ((best) (let* ((best-ten (best-ten))
+;; (len (length best-ten)))
+;; (hash-table-ref serversdat (list-ref best-ten (random len)))))
+;; ((count)(length all-valid))
+;; (else
+;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
+;; #f)))
+;; (begin
+;; (server:run areapath)
+;; (set! server-last-start (current-seconds))
+;; ;; (thread-sleep! 3)
+;; (case mode
+;; ((homehost) (cons #f #f))
+;; (else #f))))))
+
+;;======================================================================
+;; S E R V E R U T I L I T I E S
+;;======================================================================
+
+(define (server:get-servinfo-dir areapath)
+ (let* ((spath (conc areapath"/.servinfo")))
+ (if (not (file-exists? spath))
+ (create-directory spath #t))
+ spath))
+
+;; ;; Generate a unique signature for this server
+;; (define (mk-signature)
+;; (message-digest-string (md5-primitive)
+;; (with-output-to-string
+;; (lambda ()
+;; (write (list (current-directory)
+;; (current-process-id)
+;; (argv)))))))
+;;
+;; (define (server:clean-up-old areapath)
+;; ;; any server file that has not been touched in ten minutes is effectively dead
+;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
+;; (for-each
+;; (lambda (sfile)
+;; (let* ((modtime (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
+;; (current-seconds))
+;; (file-modification-time sfile))))
+;; (if (and (number? modtime)
+;; (> (- (current-seconds) modtime)
+;; 600))
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
+;; (handle-exceptions
+;; exn
+;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
+;; (delete-file sfile))))))
+;; sfiles)))
+;;
+;; (define (get-client-server-id)
+;; (if *client-server-id* *client-server-id*
+;; (let ((sig (mk-signature))) ;; clients re-use the server:mk-signature logic
+;; (set! *client-server-id* sig)
+;; *client-server-id*)))
+
+;; if srvdat is #f calculate host.pid
+(define (get-host.pid srvdat)
+ (if srvdat
+ (conc (srv-host srvdat)"."(srv-pid srvdat))
+ (conc (get-host-name)"."(current-process-id))))
+
+;; ;; ;; When using zmq this would send the message back (two step process)
+;; ;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;; ;;
+;; ;; (define (server:reply return-addr query-sig success/fail result)
+;; ;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;; ;; ;; (send-message pubsock target send-more: #t)
+;; ;; ;; (send-message pubsock
+;; ;; (case (server:get-transport)
+;; ;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
+;; ;; ((http) (db:obj->string (vector success/fail query-sig result)))
+;; ;; ((fs) result)
+;; ;; (else
+;; ;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;; ;; result)))
+;; ;;
+;; ;; ;; Given an area path, start a server process ### NOTE ### > file 2>&1
+;; ;; ;; if the target-host is set
+;; ;; ;; try running on that host
+;; ;; ;; incidental: rotate logs in logs/ dir.
+;; ;; ;;
+;; ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+;; ;; (let* ((testsuite (common:get-testsuite-name))
+;; ;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+;; ;; (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
+;; ;; ""))
+;; ;; (cmdln (conc (common:get-megatest-exe)
+;; ;; " -server - ";; (or target-host "-")
+;; ;; (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+;; ;; " -daemonize "
+;; ;; "")
+;; ;; ;; " -log " logfile
+;; ;; " -m testsuite:" testsuite
+;; ;; " " profile-mode
+;; ;; )) ;; (conc " >> " logfile " 2>&1 &")))))
+;; ;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
+;; ;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
+;; ;; ;; we want the remote server to start in *toppath* so push there
+;; ;; (push-directory areapath)
+;; ;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+;; ;; (thread-start! log-rotate)
+;; ;;
+;; ;; ;; host.domain.tld match host?
+;; ;; ;; (if (and target-host
+;; ;; ;; ;; look at target host, is it host.domain.tld or ip address and does it
+;; ;; ;; ;; match current ip or hostname
+;; ;; ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+;; ;; ;; (not (equal? curr-ip target-host)))
+;; ;; ;; (begin
+;; ;; ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+;; ;; ;; (setenv "TARGETHOST" target-host)))
+;; ;; ;;
+;; ;; (setenv "TARGETHOST_LOGF" logfile)
+;; ;; (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
+;; ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
+;; ;; (system (conc "nbfake " cmdln))
+;; ;; (unsetenv "TARGETHOST_LOGF")
+;; ;; ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+;; ;; (thread-join! log-rotate)
+;; ;; (pop-directory)))
+;; ;;
+;; ;; ;; given a path to a server log return: host port startseconds server-id
+;; ;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
+;; ;; ;; example of what it's looking for in the log file:
+;; ;; ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
+;; ;;
+;; ;; (define (server:logf-get-start-info logf)
+;; ;; (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
+;; ;; (dbprep-rx (regexp "^SERVER: dbprep"))
+;; ;; (dbprep-found 0)
+;; ;; (bad-dat (list #f #f #f #f #f)))
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; ;; WARNING: this is potentially dangerous to blanket ignore the errors
+;; ;; (if (file-exists? logf)
+;; ;; (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
+;; ;; bad-dat) ;; no idea what went wrong, call it a bad server
+;; ;; (with-input-from-file
+;; ;; logf
+;; ;; (lambda ()
+;; ;; (let loop ((inl (read-line))
+;; ;; (lnum 0))
+;; ;; (if (not (eof-object? inl))
+;; ;; (let ((mlst (string-match server-rx inl))
+;; ;; (dbprep (string-match dbprep-rx inl)))
+;; ;; (if dbprep (set! dbprep-found 1))
+;; ;; (if (not mlst)
+;; ;; (if (< lnum 500) ;; give up if more than 500 lines of server log read
+;; ;; (loop (read-line)(+ lnum 1))
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
+;; ;; bad-dat))
+;; ;; (match mlst
+;; ;; ((_ host port start server-id pid)
+;; ;; (list host
+;; ;; (string->number port)
+;; ;; (string->number start)
+;; ;; server-id
+;; ;; (string->number pid)))
+;; ;; (else
+;; ;; (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
+;; ;; bad-dat))))
+;; ;; (begin
+;; ;; (if dbprep-found
+;; ;; (begin
+;; ;; (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
+;; ;; (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
+;; ;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
+;; ;; bad-dat))))))))
+;; ;;
+;; ;; ;; ;; get a list of servers from the log files, with all relevant data
+;; ;; ;; ;; ( mod-time host port start-time pid )
+;; ;; ;; ;;
+;; ;; ;; (define (server:get-list areapath #!key (limit #f))
+;; ;; ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
+;; ;; ;; (day-seconds (* 24 60 60)))
+;; ;; ;; ;; if the directory exists continue to get the list
+;; ;; ;; ;; otherwise attempt to create the logs dir and then
+;; ;; ;; ;; continue
+;; ;; ;; (if (if (directory-exists? (conc areapath "/logs"))
+;; ;; ;; '()
+;; ;; ;; (if (file-write-access? areapath)
+;; ;; ;; (begin
+;; ;; ;; (condition-case
+;; ;; ;; (create-directory (conc areapath "/logs") #t)
+;; ;; ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
+;; ;; ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
+;; ;; ;; (directory-exists? (conc areapath "/logs")))
+;; ;; ;; '()))
+;; ;; ;;
+;; ;; ;; ;; Get the list of server logs.
+;; ;; ;; (let* (
+;; ;; ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
+;; ;; ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
+;; ;; ;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
+;; ;; ;; (num-serv-logs (length server-logs)))
+;; ;; ;; (if (or (null? server-logs) (= num-serv-logs 0))
+;; ;; ;; (let ()
+;; ;; ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
+;; ;; ;; '()
+;; ;; ;; )
+;; ;; ;; (let loop ((hed (string-chomp (car server-logs)))
+;; ;; ;; (tal (cdr server-logs))
+;; ;; ;; (res '()))
+;; ;; ;; (let* ((mod-time (handle-exceptions
+;; ;; ;; exn
+;; ;; ;; (begin
+;; ;; ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
+;; ;; ;; (current-seconds)) ;; 0
+;; ;; ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
+;; ;; ;; (down-time (- (current-seconds) mod-time))
+;; ;; ;; (serv-dat (if (or (< num-serv-logs 10)
+;; ;; ;; (< down-time 900)) ;; day-seconds))
+;; ;; ;; (server:logf-get-start-info hed)
+;; ;; ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
+;; ;; ;; (serv-rec (cons mod-time serv-dat))
+;; ;; ;; (fmatch (string-match fname-rx hed))
+;; ;; ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
+;; ;; ;; (new-res (if (null? serv-dat)
+;; ;; ;; res
+;; ;; ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
+;; ;; ;; (if (null? tal)
+;; ;; ;; (if (and limit
+;; ;; ;; (> (length new-res) limit))
+;; ;; ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
+;; ;; ;; new-res)
+;; ;; ;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
+;; ;;
+;; ;; #;(define (server:get-num-alive srvlst)
+;; ;; (let ((num-alive 0))
+;; ;; (for-each
+;; ;; (lambda (server)
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
+;; ;; (match-let (((mod-time host port start-time server-id pid)
+;; ;; server))
+;; ;; (let* ((uptime (- (current-seconds) mod-time))
+;; ;; (runtime (if start-time
+;; ;; (- mod-time start-time)
+;; ;; 0)))
+;; ;; (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
+;; ;; srvlst)
+;; ;; num-alive))
+;; ;;
+;; ;; ;; ;; given a list of servers get a list of valid servers, i.e. at least
+;; ;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is
+;; ;; ;; ;; active (i.e. mod-time < 10 seconds
+;; ;; ;; ;;
+;; ;; ;; ;; mod-time host port start-time pid
+;; ;; ;; ;;
+;; ;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
+;; ;; ;; ;; and servers should stick around for about two hours or so.
+;; ;; ;; ;;
+;; ;; ;; (define (server:get-best srvlst)
+;; ;; ;; (let* ((nums (server:get-num-servers))
+;; ;; ;; (now (current-seconds))
+;; ;; ;; (slst (sort
+;; ;; ;; (filter (lambda (rec)
+;; ;; ;; (if (and (list? rec)
+;; ;; ;; (> (length rec) 2))
+;; ;; ;; (let ((start-time (list-ref rec 3))
+;; ;; ;; (mod-time (list-ref rec 0)))
+;; ;; ;; ;; (print "start-time: " start-time " mod-time: " mod-time)
+;; ;; ;; (and start-time mod-time
+;; ;; ;; (> (- now start-time) 0) ;; been running at least 0 seconds
+;; ;; ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
+;; ;; ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
+;; ;; ;; (< (- now start-time)
+;; ;; ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
+;; ;; ;; 180)
+;; ;; ;; (random 360)))) ;; under one hour running time +/- 180
+;; ;; ;; ))
+;; ;; ;; #f))
+;; ;; ;; srvlst)
+;; ;; ;; (lambda (a b)
+;; ;; ;; (< (list-ref a 3)
+;; ;; ;; (list-ref b 3))))))
+;; ;; ;; (if (> (length slst) nums)
+;; ;; ;; (take slst nums)
+;; ;; ;; slst)))
+;; ;;
+;; ;; ;; ;; switch from server:get-list to server:get-servers-info
+;; ;; ;; ;;
+;; ;; ;; (define (server:get-first-best areapath)
+;; ;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; ;; (if (and srvrs
+;; ;; ;; (not (null? srvrs)))
+;; ;; ;; (car srvrs)
+;; ;; ;; #f)))
+;; ;; ;;
+;; ;; ;; (define (server:get-rand-best areapath)
+;; ;; ;; (let ((srvrs (server:get-best (server:get-list areapath))))
+;; ;; ;; (if (and (list? srvrs)
+;; ;; ;; (not (null? srvrs)))
+;; ;; ;; (let* ((len (length srvrs))
+;; ;; ;; (idx (random len)))
+;; ;; ;; (list-ref srvrs idx))
+;; ;; ;; #f)))
+;; ;;
+;; ;; (define (server:record->id servr)
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
+;; ;; #f)
+;; ;; (match-let (((host port start-time server-id pid)
+;; ;; servr))
+;; ;; (if server-id
+;; ;; server-id
+;; ;; #f))))
+;; ;;
+;; ;; (define (server:record->url servr)
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
+;; ;; #f)
+;; ;; (match-let (((host port start-time server-id pid)
+;; ;; servr))
+;; ;; (if (and host port)
+;; ;; (conc host ":" port)
+;; ;; #f))))
+;; ;;
+;; ;;
+;; ;; ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
+;; ;; ;; if it is old enough, overwrite it and wait 0.25 seconds.
+;; ;; ;; if it then has the wrong server key, wait + 1 and call this function recursively.
+;; ;; ;;
+;; ;; #;(define (server:wait-for-server-start-last-flag areapath)
+;; ;; (let* ((start-flag (conc areapath "/logs/server-start-last"))
+;; ;; ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
+;; ;; (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
+;; ;; (server-key (conc (get-host-name) "-" (current-process-id))))
+;; ;; (if (file-exists? start-flag)
+;; ;; (let* ((fmodtime (file-modification-time start-flag))
+;; ;; (delta (- (current-seconds) fmodtime))
+;; ;; (old-enough (> delta idletime))
+;; ;; (new-server-key ""))
+;; ;; ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t.
+;; ;; ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
+;; ;; (if (and old-enough
+;; ;; (begin
+;; ;; (debug:print-info 2 *default-log-port* "Writing " start-flag)
+;; ;; (with-output-to-file start-flag (lambda () (print server-key)))
+;; ;; (thread-sleep! 0.25)
+;; ;; (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
+;; ;; (equal? server-key new-server-key)))
+;; ;; #t
+;; ;; ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively.
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Gating server start, last start: "
+;; ;; (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
+;; ;;
+;; ;; (thread-sleep! ( + 1 idletime))
+;; ;; (server:wait-for-server-start-last-flag areapath)))))))
+;; ;;
+;; ;; ;; oldest server alive determines host then choose random of youngest
+;; ;; ;; five servers on that host
+;; ;; ;;
+;; ;; (define (server:get-servers-info areapath)
+;; ;; ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
+;; ;; (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
+;; ;; (if (not (file-exists? servinfodir))
+;; ;; (create-directory servinfodir))
+;; ;; (let* ((allfiles (glob (conc servinfodir"/*")))
+;; ;; (res (make-hash-table)))
+;; ;; (for-each
+;; ;; (lambda (f)
+;; ;; (let* ((hostport (pathname-strip-directory f))
+;; ;; (serverdat (server:logf-get-start-info f)))
+;; ;; (match serverdat
+;; ;; ((host port start server-id pid)
+;; ;; (if (and host port start server-id pid)
+;; ;; (hash-table-set! res hostport serverdat)
+;; ;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
+;; ;; (else
+;; ;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
+;; ;; allfiles)
+;; ;; res)))
+;; ;;
+;; ;; ;; check the .servinfo directory, are there other servers running on this
+;; ;; ;; or another host?
+;; ;; ;;
+;; ;; ;; returns #t => ok to start another server
+;; ;; ;; #f => not ok to start another server
+;; ;; ;;
+;; ;; (define (server:minimal-check areapath)
+;; ;; (server:clean-up-old areapath)
+;; ;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
+;; ;; (servrs (glob (conc srvdir"/*")))
+;; ;; (thishostip (server:get-best-guess-address (get-host-name)))
+;; ;; (thisservrs (glob (conc srvdir"/"thishostip":*")))
+;; ;; (homehostinf (server:choose-server areapath 'homehost))
+;; ;; (havehome (car homehostinf))
+;; ;; (wearehome (cdr homehostinf)))
+;; ;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
+;; ;; ", numservers: "(length thisservrs))
+;; ;; (cond
+;; ;; ((not havehome) #t) ;; no homehost yet, go for it
+;; ;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
+;; ;; ((and havehome (not wearehome)) #f) ;; we are not the home host
+;; ;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
+;; ;; (else
+;; ;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
+;; ;; #t))))
+;; ;;
+;; ;;
+;; ;; (define server-last-start 0)
+;; ;;
+;; ;;
+;; ;; ;; oldest server alive determines host then choose random of youngest
+;; ;; ;; five servers on that host
+;; ;; ;;
+;; ;; ;; mode:
+;; ;; ;; best - get best server (random of newest five)
+;; ;; ;; home - get home host based on oldest server
+;; ;; ;; info - print info
+;; ;; (define (server:choose-server areapath #!optional (mode 'best))
+;; ;; ;; age is current-starttime
+;; ;; ;; find oldest alive
+;; ;; ;; 1. sort by age ascending and ping until good
+;; ;; ;; find alive rand from youngest
+;; ;; ;; 1. sort by age descending
+;; ;; ;; 2. take five
+;; ;; ;; 3. check alive, discard if not and repeat
+;; ;; ;; first we clean up old server files
+;; ;; (server:clean-up-old areapath)
+;; ;; (let* ((since-last (- (current-seconds) server-last-start))
+;; ;; (server-start-delay 10))
+;; ;; (if ( < (- (current-seconds) server-last-start) 10 )
+;; ;; (begin
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
+;; ;; (thread-sleep! server-start-delay)
+;; ;; )
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+;; ;; )
+;; ;; )
+;; ;; (let* ((serversdat (server:get-servers-info areapath))
+;; ;; (servkeys (hash-table-keys serversdat))
+;; ;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
+;; ;; (sort servkeys ;; list of "host:port"
+;; ;; (lambda (a b)
+;; ;; (>= (list-ref (hash-table-ref serversdat a) 2)
+;; ;; (list-ref (hash-table-ref serversdat b) 2))))
+;; ;; '())))
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
+;; ;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
+;; ;; (if (not (null? by-time-asc))
+;; ;; (let* ((oldest (last by-time-asc))
+;; ;; (oldest-dat (hash-table-ref serversdat oldest))
+;; ;; (host (list-ref oldest-dat 0))
+;; ;; (all-valid (filter (lambda (x)
+;; ;; (equal? host (list-ref (hash-table-ref serversdat x) 0)))
+;; ;; by-time-asc))
+;; ;; (best-ten (lambda ()
+;; ;; (if (> (length all-valid) 11)
+;; ;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
+;; ;; (if (> (length all-valid) 8)
+;; ;; (drop-right all-valid 1)
+;; ;; all-valid))))
+;; ;; (names->dats (lambda (names)
+;; ;; (map (lambda (x)
+;; ;; (hash-table-ref serversdat x))
+;; ;; names)))
+;; ;; (am-home? (lambda ()
+;; ;; (let* ((currhost (get-host-name))
+;; ;; (bestadrs (server:get-best-guess-address currhost)))
+;; ;; (or (equal? host currhost)
+;; ;; (equal? host bestadrs))))))
+;; ;; (case mode
+;; ;; ((info)
+;; ;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+;; ;; (print "youngest: "(hash-table-ref serversdat (car all-valid))))
+;; ;; ((home) host)
+;; ;; ((homehost) (cons host (am-home?))) ;; shut up old code
+;; ;; ((home?) (am-home?))
+;; ;; ((best-ten)(names->dats (best-ten)))
+;; ;; ((all-valid)(names->dats all-valid))
+;; ;; ((best) (let* ((best-ten (best-ten))
+;; ;; (len (length best-ten)))
+;; ;; (hash-table-ref serversdat (list-ref best-ten (random len)))))
+;; ;; ((count)(length all-valid))
+;; ;; (else
+;; ;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
+;; ;; #f)))
+;; ;; (begin
+;; ;; (server:run areapath)
+;; ;; (set! server-last-start (current-seconds))
+;; ;; ;; (thread-sleep! 3)
+;; ;; (case mode
+;; ;; ((homehost) (cons #f #f))
+;; ;; (else #f))))))
+;; ;;
+;; ;; (define (server:get-servinfo-dir areapath)
+;; ;; (let* ((spath (conc areapath"/.servinfo")))
+;; ;; (if (not (file-exists? spath))
+;; ;; (create-directory spath #t))
+;; ;; spath))
+;; ;;
+;; ;; (define (server:clean-up-old areapath)
+;; ;; ;; any server file that has not been touched in ten minutes is effectively dead
+;; ;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
+;; ;; (for-each
+;; ;; (lambda (sfile)
+;; ;; (let* ((modtime (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
+;; ;; (current-seconds))
+;; ;; (file-modification-time sfile))))
+;; ;; (if (and (number? modtime)
+;; ;; (> (- (current-seconds) modtime)
+;; ;; 600))
+;; ;; (begin
+;; ;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
+;; ;; (delete-file sfile))))))
+;; ;; sfiles)))
+;; ;;
+;; ;; ;; would like to eventually get rid of this
+;; ;; ;;
+;; ;; (define (common:on-homehost?)
+;; ;; (server:choose-server *toppath* 'home?))
+;; ;;
+;; ;; ;; kind start up of server, wait before allowing another server for a given
+;; ;; ;; area to be launched
+;; ;; ;;
+;; ;; (define (server:kind-run areapath)
+;; ;; ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
+;; ;; ;; and wait for it to be at least seconds old
+;; ;; ;; (server:wait-for-server-start-last-flag areapath)
+;; ;; (let loop ()
+;; ;; (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
+;; ;; (begin
+;; ;; (if (common:low-noise-print 30 "our-host-load")
+;; ;; (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
+;; ;; (loop))))
+;; ;; (if (< (server:choose-server areapath 'count) 20)
+;; ;; (server:run areapath))
+;; ;; #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+;; ;; (let* ((lock-file (conc areapath "/logs/server-start.lock")))
+;; ;; (let* ((start-flag (conc areapath "/logs/server-start-last")))
+;; ;; (common:simple-file-lock-and-wait lock-file expire-time: 25)
+;; ;; (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
+;; ;; (system (conc "touch " start-flag)) ;; lazy but safe
+;; ;; (server:run areapath)
+;; ;; (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
+;; ;; (common:simple-file-release-lock lock-file)))
+;; ;; (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
+;; ;;
+;; ;; ;; this one seems to be the general entry point
+;; ;; ;;
+;; ;; (define (server:start-and-wait areapath #!key (timeout 60))
+;; ;; (let ((give-up-time (+ (current-seconds) timeout)))
+;; ;; (let loop ((server-info (server:check-if-running areapath))
+;; ;; (try-num 0))
+;; ;; (if (or server-info
+;; ;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
+;; ;; (server:record->url server-info)
+;; ;; (let* ( (servers (server:choose-server areapath 'all-valid))
+;; ;; (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
+;; ;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again
+;; ;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
+;; ;; (server:run areapath))
+;; ;; (thread-sleep! 5)
+;; ;; (loop (server:check-if-running areapath)
+;; ;; (+ try-num 1)))))))
+;; ;;
+;; ;; (define (server:get-num-servers #!key (numservers 2))
+;; ;; (let ((ns (string->number
+;; ;; (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
+;; ;; (or ns numservers)))
+;; ;;
+;; ;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time.
+;; ;; ;;
+;; ;; (define (server:check-if-running areapath) ;; #!key (numservers "2"))
+;; ;; (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
+;; ;; (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
+;; ;; (if (or (and servers
+;; ;; (null? servers))
+;; ;; (not servers))
+;; ;; ;; (and (list? servers)
+;; ;; ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
+;; ;; #f
+;; ;; (let loop ((hed (car servers))
+;; ;; (tal (cdr servers)))
+;; ;; (let ((res (server:check-server hed)))
+;; ;; (if res
+;; ;; hed
+;; ;; (if (null? tal)
+;; ;; #f
+;; ;; (loop (car tal)(cdr tal)))))))))
+;; ;;
+;; ;; ;; ping the given server
+;; ;; ;;
+;; ;; (define (server:check-server server-record)
+;; ;; (let* ((server-url (server:record->url server-record))
+;; ;; (server-id (server:record->id server-record))
+;; ;; (res (server:ping server-url server-id)))
+;; ;; (if res
+;; ;; server-url
+;; ;; #f)))
+;; ;;
+;; ;; (define (server:kill servr)
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; (begin
+;; ;; (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
+;; ;; #f)
+;; ;; (match-let (((mod-time hostname port start-time server-id pid)
+;; ;; servr))
+;; ;; (tasks:kill-server hostname pid))))
+;; ;;
+;; ;; ;; called in megatest.scm, host-port is string hostname:port
+;; ;; ;;
+;; ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
+;; ;; ;; in the same process as the server.
+;; ;; ;;
+;; ;; (define (server:ping host:port server-id #!key (do-exit #f))
+;; ;; (let* ((host-port (cond
+;; ;; ((string? host:port)
+;; ;; (let ((slst (string-split host:port ":")))
+;; ;; (if (eq? (length slst) 2)
+;; ;; (list (car slst)(string->number (cadr slst)))
+;; ;; #f)))
+;; ;; (else
+;; ;; #f))))
+;; ;; (cond
+;; ;; ((and (list? host-port)
+;; ;; (eq? (length host-port) 2))
+;; ;; (let* ((myrunremote (make-remote))
+;; ;; (iface (car host-port))
+;; ;; (port (cadr host-port))
+;; ;; (server-dat (client:connect iface port server-id myrunremote))
+;; ;; (login-res (rmt:login-no-auto-client-setup myrunremote)))
+;; ;; (if (and (list? login-res)
+;; ;; (car login-res))
+;; ;; (begin
+;; ;; ;; (print "LOGIN_OK")
+;; ;; (if do-exit (exit 0))
+;; ;; #t)
+;; ;; (begin
+;; ;; ;; (print "LOGIN_FAILED")
+;; ;; (if do-exit (exit 1))
+;; ;; #f))))
+;; ;; (else
+;; ;; (if host:port
+;; ;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
+;; ;; (if do-exit
+;; ;; (exit 1)
+;; ;; #f)))))
+;; ;;
+;; ;; ;; run ping in separate process, safest way in some cases
+;; ;; ;;
+;; ;; (define (server:ping-server ifaceport)
+;; ;; (with-input-from-pipe
+;; ;; (conc (common:get-megatest-exe) " -ping " ifaceport)
+;; ;; (lambda ()
+;; ;; (let loop ((inl (read-line))
+;; ;; (res "NOREPLY"))
+;; ;; (if (eof-object? inl)
+;; ;; (case (string->symbol res)
+;; ;; ((NOREPLY) #f)
+;; ;; ((LOGIN_OK) #t)
+;; ;; (else #f))
+;; ;; (loop (read-line) inl))))))
+;; ;;
+;; ;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;; ;; ;;
+;; ;; (define (server:login toppath)
+;; ;; (lambda (toppath)
+;; ;; (set! *db-last-access* (current-seconds)) ;; might not be needed.
+;; ;; (if (equal? *toppath* toppath)
+;; ;; #t
+;; ;; #f)))
+;; ;;
+;; ;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute
+;; ;; ;; This is currently broken. Just use the number of hours with no unit.
+;; ;; ;; Default is 60 seconds.
+;; ;; ;;
+;; ;; (define (server:expiration-timeout)
+;; ;; (let ((tmo (configf:lookup *configdat* "server" "timeout")))
+;; ;; (if (and (string? tmo)
+;; ;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
+;; ;; (* 3600 (string->number tmo))
+;; ;; 600)))
+;; ;;
+;; ;; (define (server:get-best-guess-address hostname)
+;; ;; (let ((res #f))
+;; ;; (for-each
+;; ;; (lambda (adr)
+;; ;; (if (not (eq? (u8vector-ref adr 0) 127))
+;; ;; (set! res adr)))
+;; ;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+;; ;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+;; ;; (string-intersperse
+;; ;; (map number->string
+;; ;; (u8vector->list
+;; ;; (if res res (hostname->ip hostname)))) ".")))
+;; ;;
+;; ;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; ;; ;; (define (server:release-sync-lock)
+;; ;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; ;; ;; (define (server:have-sync-lock?)
+;; ;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;; ;; ;; (have-lock? (car have-lock-pair))
+;; ;; ;; (lock-time (cdr have-lock-pair))
+;; ;; ;; (lock-age (- (current-seconds) lock-time)))
+;; ;; ;; (cond
+;; ;; ;; (have-lock? #t)
+;; ;; ;; ((>lock-age
+;; ;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;; ;; ;; (server:release-sync-lock)
+;; ;; ;; (server:have-sync-lock?))
+;; ;; ;; (else #f))))
+;; ;;
+;; ;; ;; moving this here as it needs access to db and cannot be in common.
+;; ;; ;;
+;; ;;
+;; ;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
+;; ;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
+;; ;; (lambda ()
+;; ;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
+;; ;; #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
+;; ;; (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
+;; ;; (tmp-area (common:get-db-tmp-area))
+;; ;; (tmp-db (conc tmp-area "/megatest.db"))
+;; ;; (staging-file (conc *toppath* "/.megatest.db"))
+;; ;; (mtdbfile (conc *toppath* "/megatest.db"))
+;; ;; (lockfile (common:get-sync-lock-filepath))
+;; ;; (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
+;; ;; (sync-cmd (if fork-to-background
+;; ;; (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
+;; ;; sync-cmd-core))
+;; ;; (default-min-intersync-delay 2)
+;; ;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
+;; ;; (default-duty-cycle 0.1)
+;; ;; (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
+;; ;; (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
+;; ;; (calculate-off-time (lambda (work-duration duty-cycle)
+;; ;; (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
+;; ;; (off-time min-intersync-delay) ;; adjusted in closure below.
+;; ;; (do-a-sync
+;; ;; (lambda ()
+;; ;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
+;; ;; (let* ((finalres
+;; ;; (let retry-loop ((num-tries 0))
+;; ;; (if (common:simple-file-lock lockfile)
+;; ;; (begin
+;; ;; (cond
+;; ;; ((not (or fork-to-background persist-until-sync))
+;; ;; (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
+;; ;; " , off-time="off-time" seconds ]")
+;; ;; (thread-sleep! (max off-time min-intersync-delay)))
+;; ;; (else
+;; ;; (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
+;; ;;
+;; ;; (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
+;; ;; (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
+;; ;; (delete-file* staging-file)
+;; ;; (let* ((start-time (current-milliseconds))
+;; ;; (res (system sync-cmd))
+;; ;; (dbbackupfile (conc mtdbfile ".backup"))
+;; ;; (res2
+;; ;; (cond
+;; ;; ((eq? 0 res )
+;; ;; (handle-exceptions
+;; ;; exn
+;; ;; #f
+;; ;; (if (file-exists? dbbackupfile)
+;; ;; (delete-file* dbbackupfile)
+;; ;; )
+;; ;; (if (eq? 0 (file-size sync-log))
+;; ;; (delete-file* sync-log))
+;; ;; (system (conc "/bin/mv " staging-file " " mtdbfile))
+;; ;;
+;; ;; (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
+;; ;; (set! off-time (calculate-off-time
+;; ;; last-sync-seconds
+;; ;; (cond
+;; ;; ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
+;; ;; duty-cycle)
+;; ;; (else
+;; ;; (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
+;; ;; default-duty-cycle))))
+;; ;;
+;; ;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
+;; ;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
+;; ;; 'sync-completed))
+;; ;; (else
+;; ;; (system (conc "/bin/cp "sync-log" "sync-log".fail"))
+;; ;; (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
+;; ;; (if (file-exists? (conc mtdbfile ".backup"))
+;; ;; (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
+;; ;; #f))))
+;; ;; (common:simple-file-release-lock lockfile)
+;; ;; (BB> "released lockfile: " lockfile)
+;; ;; (when (common:file-exists? lockfile)
+;; ;; (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
+;; ;; res2) ;; end let
+;; ;; );; end begin
+;; ;; ;; else
+;; ;; (cond
+;; ;; (persist-until-sync
+;; ;; (thread-sleep! 1)
+;; ;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
+;; ;; (retry-loop (add1 num-tries)))
+;; ;; (else
+;; ;; (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
+;; ;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
+;; ;; 'parallel-sync-in-progress))
+;; ;; ) ;; end if got lockfile
+;; ;; )
+;; ;; ))
+;; ;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
+;; ;; finalres)
+;; ;; ) ;; end lambda
+;; ;; ))
+;; ;; do-a-sync))
+;; ;;
+;; ;;
+
+)
Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -22,11 +22,11 @@
(import (prefix sqlite3 sqlite3:))
(declare (unit tasks))
(declare (uses dbfile))
(declare (uses db))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses common))
(declare (uses pgdb))
(import dbfile)
;; (import pgdb) ;; pgdb is a module
Index: tcmt.scm
==================================================================
--- tcmt.scm
+++ tcmt.scm
@@ -27,11 +27,11 @@
(use trace)
;; (trace-call-sites #t)
(declare (uses margs))
-(declare (uses rmt))
+(declare (uses rmtmod))
(declare (uses common))
;; (declare (uses megatest-version))
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
Index: tdb.scm
==================================================================
--- tdb.scm
+++ tdb.scm
@@ -29,11 +29,11 @@
(declare (unit tdb))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
-(declare (uses client))
+(declare (uses clientmod))
(declare (uses mt))
(declare (uses db))
(include "common_records.scm")
(include "db_records.scm")
Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -30,11 +30,11 @@
(declare (uses commonmod))
;; (declare (uses dcommon)) ;; needed for the steps processing
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))
-(declare (uses server))
+(declare (uses servermod))
;;(declare (uses stml2))
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
(import (prefix sqlite3 sqlite3:))
(import commonmod)
Index: tree.scm
==================================================================
--- tree.scm
+++ tree.scm
@@ -30,11 +30,11 @@
(declare (uses margs))
(declare (uses launch))
;; (declare (uses megatest-version))
(declare (uses gutils))
(declare (uses db))
-(declare (uses server))
+;; (declare (uses server))
;; (declare (uses synchash))
(declare (uses dcommon))
(include "megatest-version.scm")
(include "common_records.scm")
ADDED ulex/dbmgr.scm
Index: ulex/dbmgr.scm
==================================================================
--- /dev/null
+++ ulex/dbmgr.scm
@@ -0,0 +1,1131 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+
+(declare (unit dbmgrmod))
+
+(declare (uses ulex))
+(declare (uses apimod))
+(declare (uses pkts))
+(declare (uses commonmod))
+(declare (uses dbmod))
+(declare (uses mtargs))
+(declare (uses portloggermod))
+(declare (uses debugprint))
+
+(module dbmgrmod
+ *
+
+(import scheme
+ chicken.base
+ chicken.condition
+ chicken.file
+ chicken.format
+ chicken.port
+ chicken.process
+ chicken.process-context
+ chicken.process-context.posix
+ chicken.sort
+ chicken.string
+ chicken.time
+
+ (prefix sqlite3 sqlite3:)
+ matchable
+ md5
+ message-digest
+ regex
+ s11n
+ srfi-1
+ srfi-18
+ srfi-69
+ system-information
+ typed-records
+
+ pkts
+ ulex
+
+ commonmod
+ apimod
+ dbmod
+ debugprint
+ (prefix mtargs args:)
+ portloggermod
+ )
+
+;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048)
+
+;; info about me as a listener and my connections to db servers
+;; stored (for now) in *db-serv-info*
+;;
+(defstruct servdat
+ (host #f)
+ (port #f)
+ (uuid #f)
+ (dbfile #f)
+ (uconn #f) ;; this is the listener *FOR THIS PROCESS*
+ (mode #f)
+ (status 'starting)
+ (trynum 0) ;; count the number of ports we've tried
+ (conns (make-hash-table)) ;; apath/dbname => conndat
+ )
+
+(define *db-serv-info* (make-servdat))
+
+(define (servdat->url sdat)
+ (conc (servdat-host sdat)":"(servdat-port sdat)))
+
+;; db servers contact info
+;;
+(defstruct conndat
+ (apath #f)
+ (dbname #f)
+ (fullname #f)
+ (hostport #f)
+ (ipaddr #f)
+ (port #f)
+ (srvpkt #f)
+ (srvkey #f)
+ (lastmsg 0)
+ (expires 0))
+
+(define *srvpktspec*
+ `((server (host . h)
+ (port . p)
+ (servkey . k)
+ (pid . i)
+ (ipaddr . a)
+ (dbpath . d))))
+
+;;======================================================================
+;; S U P P O R T F U N C T I O N S
+;;======================================================================
+
+;; set up the api proc, seems like there should be a better place for this?
+;;
+;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE
+;;
+;; (define api-proc (make-parameter conc))
+;; (api-proc api:execute-requests)
+
+;; do we have a connection to apath dbname and
+;; is it not expired? then return it
+;;
+;; else setup a connection
+;;
+;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
+;;
+(define (rmt:get-conn remdat apath dbname)
+ (let* ((fullname (db:dbname->path apath dbname)))
+ (hash-table-ref/default (servdat-conns remdat) fullname #f)))
+
+(define (rmt:drop-conn remdat apath dbname)
+ (let* ((fullname (db:dbname->path apath dbname)))
+ (hash-table-delete! (servdat-conns remdat) fullname)))
+
+(define (rmt:find-main-server uconn apath dbname)
+ (let* ((pktsdir (get-pkts-dir apath))
+ (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
+ (viable-srvs (get-viable-servers all-srvpkts dbname)))
+ (get-the-server uconn apath viable-srvs)))
+
+
+(define *connstart-mutex* (make-mutex))
+(define *last-main-start* 0)
+
+;; looks for a connection to main, returns if have and not exired
+;; creates new otherwise
+;;
+;; connections for other servers happens by requesting from main
+;;
+;; TODO: This is unnecessarily re-creating the record in the hash table
+;;
+(define (rmt:open-main-connection remdat apath)
+ (let* ((fullpath (db:dbname->path apath ".db/main.db"))
+ (conns (servdat-conns remdat))
+ (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this
+ (start-rmt:run (lambda ()
+ (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server")))
+ (thread-start! th1)
+ (thread-sleep! 1)
+ (let loop ((count 0))
+ (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection")
+ (if (or (not *db-serv-info*)
+ (not (servdat-uconn *db-serv-info*)))
+ (begin
+ (thread-sleep! 1)
+ (loop (+ count 1)))
+ (begin
+ (servdat-mode-set! *db-serv-info* 'non-db)
+ (servdat-uconn *db-serv-info*)))))))
+ (myconn (servdat-uconn *db-serv-info*)))
+ (cond
+ ((not myconn)
+ (start-rmt:run)
+ (rmt:open-main-connection remdat apath))
+ ((and conn ;; conn is NOT a socket, just saying ...
+ (< (current-seconds) (conndat-expires conn)))
+ #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died
+ ((and conn
+ (>= (current-seconds)(conndat-expires conn)))
+ (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
+ (rmt:drop-conn remdat apath ".db/main.db") ;;
+ (rmt:open-main-connection remdat apath))
+ (else
+ ;; Below we will find or create and connect to main
+ (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch")
+ (let* ((dbname (db:run-id->dbname #f))
+ (the-srv (rmt:find-main-server myconn apath dbname))
+ (start-main-srv (lambda () ;; call IF there is no the-srv found
+ (mutex-lock! *connstart-mutex*)
+ (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
+ (begin
+ (api:run-server-process apath dbname)
+ (set! *last-main-start* (current-seconds))
+ (thread-sleep! 1))
+ (thread-sleep! 0.25))
+ (mutex-unlock! *connstart-mutex*)
+ (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
+ )))
+ (if (not the-srv) ;; have server, try connecting to it
+ (start-main-srv)
+ (let* ((srv-addr (server-address the-srv)) ;; need serv
+ (ipaddr (alist-ref 'ipaddr the-srv))
+ (port (alist-ref 'port the-srv))
+ (srvkey (alist-ref 'servkey the-srv))
+ (fullpath (db:dbname->path apath dbname))
+
+ (new-the-srv (make-conndat
+ apath: apath
+ dbname: dbname
+ fullname: fullpath
+ hostport: srv-addr
+ ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection?
+ ipaddr: ipaddr
+ port: port
+ srvpkt: the-srv
+ srvkey: srvkey ;; generated by rmt:get-signature on the server side
+ lastmsg: (current-seconds)
+ expires: (+ (current-seconds)
+ (server:expiration-timeout)
+ -2) ;; this needs to be gathered during the ping
+ )))
+ (hash-table-set! conns fullpath new-the-srv)))
+ #t)))))
+
+;; NB// sinfo is a servdat struct
+;;
+(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5))
+ (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
+ (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable
+ (fullname (db:dbname->path apath dbname))
+ (conns (servdat-conns sinfo))
+ (mconn (rmt:get-conn sinfo apath ".db/main.db"))
+ (dconn (rmt:get-conn sinfo apath dbname)))
+ #;(if (and mconn
+ (not (debug:print-logger)))
+ (begin
+ (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
+ (debug:print-logger rmt:log-to-main)))
+ (cond
+ ((and mconn
+ dconn
+ (< (current-seconds)(conndat-expires dconn)))
+ #t) ;; good to go
+ ((not mconn) ;; no channel open to main? open it...
+ (rmt:open-main-connection sinfo apath)
+ (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+ ((not dconn) ;; no channel open to dbname?
+ (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname))))
+ (case res
+ ((server-started)
+ (if (> num-tries 0)
+ (begin
+ (thread-sleep! 2)
+ (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+ (begin
+ (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname)
+ (exit 1))))
+ (else
+ (if (list? res) ;; server has been registered and the info was returned. pass it on.
+ (begin ;; ("192.168.0.9" 53817
+ ;; "5e34239f48e8973b3813221e54701a01" "24310"
+ ;; "192.168.0.9"
+ ;; "/home/matt/data/megatest/tests/simplerun"
+ ;; ".db/1.db")
+ (match
+ res
+ ((host port servkey pid ipaddr apath dbname)
+ (debug:print-info 0 *default-log-port* "got "res)
+ (hash-table-set! conns
+ fullname
+ (make-conndat
+ apath: apath
+ dbname: dbname
+ hostport: (conc host":"port)
+ ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection?
+ ipaddr: ipaddr
+ port: port
+ srvkey: servkey
+ lastmsg: (current-seconds)
+ expires: (+ (current-seconds)
+ (server:expiration-timeout)
+ -2))))
+ (else
+ (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res)))
+ res)
+ (begin
+ (debug:print-info 0 *default-log-port* "Unexpected result: " res)
+ res)))))))
+ #t))
+
+;;======================================================================
+
+;; FOR DEBUGGING SET TO #t
+;; (define *localmode* #t)
+(define *localmode* #f)
+(define *dbstruct* (make-dbr:dbstruct))
+
+;; Defaults to current area
+;;
+(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
+ (let* ((apath *toppath*)
+ (sinfo *db-serv-info*)
+ (dbname (db:run-id->dbname rid)))
+ (if *localmode*
+ (api:execute-requests *dbstruct* cmd params)
+ (begin
+ (rmt:open-main-connection sinfo apath)
+ (if rid (rmt:general-open-connection sinfo apath dbname))
+ #;(if (not (member cmd '(log-to-main)))
+ (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params))
+ (rmt:send-receive-real sinfo apath dbname cmd params)))))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future
+;;
+(define (rmt:send-receive-real sinfo apath dbname cmd params)
+ (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.")
+ (let* ((cdat (rmt:get-conn sinfo apath dbname)))
+ (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
+ (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex
+ ;; then send-receive using the ulex layer to host-port stored in cdat
+ (res (send-receive uconn (conndat-hostport cdat) cmd params))
+ #;(th1 (make-thread (lambda ()
+ (set! res (send-receive uconn (conndat-hostport cdat) cmd params)))
+ "send-receive thread")))
+ ;; (thread-start! th1)
+ ;; (thread-join! th1) ;; gratuitious thread stuff is so that mailbox is not used in primordial thead
+ ;; since we accessed the server we can bump the expires time up
+ (conndat-expires-set! cdat (+ (current-seconds)
+ (server:expiration-timeout)
+ -10)) ;; ten second margin for network time misalignments etc.
+ res)))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future.
+;;
+;; Purpose - call the main.db server and request a server be started
+;; for the given area path and dbname
+;;
+
+(define (rmt:print-db-stats)
+ (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+ (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
+ (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+ (for-each (lambda (cmd)
+ (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+ (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+ (sort (hash-table-keys *db-stats*)
+ (lambda (a b)
+ (> (vector-ref (hash-table-ref *db-stats* a) 0)
+ (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+
+(define (rmt:get-max-query-average run-id)
+ (mutex-lock! *db-stats-mutex*)
+ (let* ((runkey (conc "run-id=" run-id " "))
+ (cmds (filter (lambda (x)
+ (substring-index runkey x))
+ (hash-table-keys *db-stats*)))
+ (res (if (null? cmds)
+ (cons 'none 0)
+ (let loop ((cmd (car cmds))
+ (tal (cdr cmds))
+ (max-cmd (car cmds))
+ (res 0))
+ (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+ (tot (vector-ref cmd-dat 0))
+ (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+ (currmax (max res curravg))
+ (newmax-cmd (if (> curravg res) cmd max-cmd)))
+ (if (null? tal)
+ (if (> tot 10)
+ (cons newmax-cmd currmax)
+ (cons 'none 0))
+ (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+ (mutex-unlock! *db-stats-mutex*)
+ res))
+
+;; host and port are used to ensure we are remove proper records
+(define (rmt:server-shutdown host port)
+ (let ((dbfile (servdat-dbfile *db-serv-info*)))
+ (debug:print-info 0 *default-log-port* "dbfile is "dbfile)
+ (if dbfile
+ (let* ((am-server (args:get-arg "-server"))
+ (dbfile (args:get-arg "-db"))
+ (apath *toppath*)
+ #;(sinfo *remotedat*)) ;; foundation for future fix
+ (if *dbstruct-db*
+ (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile))
+ (db (dbr:dbdat-db dbdat))
+ (inmem (dbr:dbdat-db dbdat)) ;; WRONG
+ )
+ ;; do a final sync here
+ (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
+ (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
+ ;; let's finalize here
+ (debug:print-info 0 *default-log-port* "Finalizing db and inmem")
+ (if (sqlite3:database? db)
+ (sqlite3:finalize! db)
+ (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
+ (if (sqlite3:database? inmem)
+ (sqlite3:finalize! inmem)
+ (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
+ (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete"))
+ (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do."))
+ (if (not am-server)
+ (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
+ (if (string-match ".*/main.db$" dbfile)
+ (let ((pkt-file (conc (get-pkts-dir *toppath*)
+ "/" (servdat-uuid *db-serv-info*)
+ ".pkt")))
+ (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
+ (delete-file* pkt-file)
+ (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
+ (db:with-lock-db
+ (servdat-dbfile *db-serv-info*)
+ (lambda (dbh dbfile)
+ (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
+ (let* ((sdat *db-serv-info*) ;; we have a run-id server
+ (host (servdat-host sdat))
+ (port (servdat-port sdat))
+ (uuid (servdat-uuid sdat))
+ (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile)))
+ (debug:print-info 0 *default-log-port* "deregistered-server, res="res)
+ (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
+ )))))))
+
+
+(define (common:run-sync?)
+ ;; (and (common:on-homehost?)
+ (args:get-arg "-server"))
+
+(define *rmt:run-mutex* (make-mutex))
+(define *rmt:run-flag* #f)
+
+;; Main entry point to start a server. was start-server
+(define (rmt:run hostn)
+ (mutex-lock! *rmt:run-mutex*)
+ (if *rmt:run-flag*
+ (begin
+ (debug:print-warn 0 *default-log-port* "rmt:run already running.")
+ (mutex-unlock! *rmt:run-mutex*))
+ (begin
+ (set! *rmt:run-flag* #t)
+ (mutex-unlock! *rmt:run-mutex*)
+ ;; ;; Configurations for server
+ ;; (tcp-buffer-size 2048)
+ ;; (max-connections 2048)
+ (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...")
+ (if (and *db-serv-info*
+ (servdat-uconn *db-serv-info*))
+ (let* ((uconn (servdat-uconn *db-serv-info*)))
+ (wait-and-close uconn))
+ (let* ((port (portlogger:open-run-close portlogger:find-port))
+ (handler-proc (lambda (rem-host-port qrykey cmd params) ;;
+ (set! *db-last-access* (current-seconds))
+ (assert (list? params) "FATAL: handler called with non-list params")
+ (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params)
+ (debug:print 0 *default-log-port* "handler call: "cmd", params="params)
+ (api:execute-requests *dbstruct-db* cmd params))))
+ ;; (api:process-request *dbstuct-db*
+ (if (not *db-serv-info*)
+ (set! *db-serv-info* (make-servdat host: hostn port: port)))
+ (let* ((uconn (run-listener handler-proc port))
+ (rport (udat-port uconn))) ;; the real port
+ (servdat-host-set! *db-serv-info* hostn)
+ (servdat-port-set! *db-serv-info* rport)
+ (servdat-uconn-set! *db-serv-info* uconn)
+ (wait-and-close uconn)
+ (db:print-current-query-stats)
+ )))
+ (let* ((host (servdat-host *db-serv-info*))
+ (port (servdat-port *db-serv-info*))
+ (mode (or (servdat-mode *db-serv-info*)
+ "non-db")))
+ ;; server exit stuff here
+ ;; (rmt:server-shutdown host port) - always do in on-exit
+ ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit
+ (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting")
+ ))))
+
+;;======================================================================
+;; S E R V E R U T I L I T I E S
+;;======================================================================
+
+
+;;======================================================================
+;; NEW SERVER METHOD
+;;======================================================================
+
+;; only use for main.db - need to re-write some of this :(
+;;
+(define (get-lock-db sdat dbfile host port)
+ (assert host "FATAL: get-lock-db called with host not set.")
+ (assert port "FATAL: get-lock-db called with port not set.")
+ (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
+ (res (db:get-iam-server-lock dbh dbfile host port))
+ (uconn (servdat-uconn sdat)))
+ ;; res => list then already locked, check server is responsive
+ ;; => #t then sucessfully got the lock
+ ;; => #f reserved for future use as to indicate something went wrong
+ (match res
+ ((owner_pid owner_host owner_port event_time)
+ (if (server-ready? uconn (conc owner_host":"owner_port) "abc")
+ #f ;; locked by someone else
+ (begin ;; locked by someone dead and gone
+ (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
+ (db:steal-lock-db dbh dbfile port))))
+ (#t #t) ;; placeholder so that we don't touch res if it is #t
+ (else (set! res #f)))
+ (sqlite3:finalize! dbh)
+ res))
+
+
+(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
+ (let* ((pkt-dat `((host . ,host)
+ (port . ,port)
+ (servkey . ,servkey)
+ (pid . ,(current-process-id))
+ (ipaddr . ,ipaddr)
+ (dbpath . ,dbpath)))
+ (uuid (write-alist->pkt
+ pkts-dir
+ pkt-dat
+ pktspec: pkt-spec
+ ptype: 'server)))
+ (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
+ uuid))
+
+(define (get-pkts-dir #!optional (apath #f))
+ (let* ((effective-toppath (or *toppath* apath)))
+ (assert effective-toppath
+ "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
+ (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
+ (if (file-exists? pdir)
+ pdir
+ (begin
+ (handle-exceptions ;; this exception handler should NOT be needed but ...
+ exn
+ pdir
+ (create-directory pdir #t))
+ pdir)))))
+
+;; given a pkts dir read
+;;
+(define (get-all-server-pkts pktsdir-in pktspec)
+ (let* ((pktsdir (if (file-exists? pktsdir-in)
+ pktsdir-in
+ (begin
+ (create-directory pktsdir-in #t)
+ pktsdir-in)))
+ (all-pkt-files (glob (conc pktsdir "/*.pkt"))))
+ (map (lambda (pkt-file)
+ (read-pkt->alist pkt-file pktspec: pktspec))
+ all-pkt-files)))
+
+(define (server-address srv-pkt)
+ (conc (alist-ref 'host srv-pkt) ":"
+ (alist-ref 'port srv-pkt)))
+
+(define (server-ready? uconn host-port key) ;; server-address is host:port
+ (let* ((params `((cmd . ping)(key . ,key)))
+ (data `((cmd . ping)
+ (key . ,key)
+ (params . ,params))) ;; I don't get it.
+ (res (send-receive uconn host-port 'ping data)))
+ (if (eq? res 'ack) ;; yep, likely it is who we want on the other end
+ res
+ #f)))
+;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f))))
+
+; from the pkts return servers associated with dbpath
+;; NOTE: Only one can be alive - have to check on each
+;; in the list of pkts returned
+;;
+(define (get-viable-servers serv-pkts dbpath)
+ (let loop ((tail serv-pkts)
+ (res '()))
+ (if (null? tail)
+ res ;; NOTE: sort by age so oldest is considered first
+ (let* ((spkt (car tail)))
+ (loop (cdr tail)
+ (if (equal? dbpath (alist-ref 'dbpath spkt))
+ (cons spkt res)
+ res))))))
+
+(define (remove-pkts-if-not-alive uconn serv-pkts)
+ (filter (lambda (pkt)
+ (let* ((host (alist-ref 'host pkt))
+ (port (alist-ref 'port pkt))
+ (host-port (conc host":"port))
+ (key (alist-ref 'servkey pkt))
+ (pktz (alist-ref 'Z pkt))
+ (res (server-ready? uconn host-port key)))
+ (if res
+ res
+ (let* ((pktsdir (get-pkts-dir *toppath*))
+ (pktpath (conc pktsdir"/"pktz".pkt")))
+ (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
+ (delete-file* pktpath)
+ #f))))
+ serv-pkts))
+
+;; from viable servers get one that is alive and ready
+;;
+(define (get-the-server uconn apath serv-pkts)
+ (let loop ((tail serv-pkts))
+ (if (null? tail)
+ #f
+ (let* ((spkt (car tail))
+ (host (alist-ref 'ipaddr spkt))
+ (port (alist-ref 'port spkt))
+ (host-port (conc host":"port))
+ (dbpth (alist-ref 'dbpath spkt))
+ (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt))
+ (addr (server-address spkt)))
+ (if (server-ready? uconn host-port srvkey)
+ spkt
+ (loop (cdr tail)))))))
+
+;; am I the "first" in line server? I.e. my D card is smallest
+;; use Z card as tie breaker
+;;
+(define (get-best-candidate serv-pkts dbpath)
+ (if (null? serv-pkts)
+ #f
+ (let loop ((tail serv-pkts)
+ (best (car serv-pkts)))
+ (if (null? tail)
+ best
+ (let* ((candidate (car tail))
+ (candidate-bd (string->number (alist-ref 'D candidate)))
+ (best-bd (string->number (alist-ref 'D best)))
+ ;; bigger number is younger
+ (candidate-z (alist-ref 'Z candidate))
+ (best-z (alist-ref 'Z best))
+ (new-best (cond
+ ((> best-bd candidate-bd) ;; best is younger than candidate
+ candidate)
+ ((< best-bd candidate-bd) ;; candidate is younger than best
+ best)
+ (else
+ (if (string>=? best-z candidate-z)
+ best
+ candidate))))) ;; use Z card as tie breaker
+ (if (null? tail)
+ new-best
+ (loop (cdr tail) new-best)))))))
+
+
+;;======================================================================
+;; END NEW SERVER METHOD
+;;======================================================================
+
+;; if .db/main.db check the pkts
+;;
+(define (rmt:wait-for-server pkts-dir db-file server-key)
+ (let* ((sdat *db-serv-info*))
+ (let loop ((start-time (current-seconds))
+ (changed #t)
+ (last-sdat "not this"))
+ (begin ;; let ((sdat #f))
+ (thread-sleep! 0.01)
+ (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+ (mutex-lock! *heartbeat-mutex*)
+ (set! sdat *db-serv-info*)
+ (mutex-unlock! *heartbeat-mutex*)
+ (if (and sdat
+ (not changed)
+ (> (- (current-seconds) start-time) 2))
+ (let* ((uconn (servdat-uconn sdat)))
+ (servdat-status-set! sdat 'iface-stable)
+ (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
+ ;; create a server pkt in *toppath*/.meta/srvpkts
+
+ ;; TODO:
+ ;; 1. change sdat to stuct
+ ;; 2. add uuid to struct
+ ;; 3. update uuid in sdat here
+ ;;
+ (servdat-uuid-set! sdat
+ (register-server
+ pkts-dir *srvpktspec*
+ (get-host-name)
+ (servdat-port sdat) server-key
+ (servdat-host sdat) db-file))
+ ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
+ ;; now read pkts and see if we are a contender
+ (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*))
+ (viables (get-viable-servers all-pkts db-file))
+ (alive (remove-pkts-if-not-alive uconn viables))
+ (best-srv (get-best-candidate alive db-file))
+ (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
+ (i-am-srv (equal? best-srv-key server-key))
+ (delete-pkt (lambda ()
+ (let* ((pktfile (conc (get-pkts-dir *toppath*)
+ "/" (servdat-uuid *db-serv-info*)
+ ".pkt")))
+ (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
+ (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
+ (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
+ ;; am I the best-srv, compare server-keys to know
+ (if i-am-srv
+ (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
+ (begin
+ (debug:print-info 0 *default-log-port* "I'm the server!")
+ (servdat-dbfile-set! sdat db-file)
+ (servdat-status-set! sdat 'db-locked))
+ (begin
+ (debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
+ (bdat-time-to-exit-set! *bdat* #t)
+ (delete-pkt)
+ (thread-sleep! 0.2)
+ (exit)))
+ (begin
+ (debug:print-info 0 *default-log-port*
+ "Keys do not match "best-srv-key", "server-key", exiting.")
+ (bdat-time-to-exit-set! *bdat* #t)
+ (delete-pkt)
+ (thread-sleep! 0.2)
+ (exit)))
+ sdat))
+ (begin ;; sdat not yet contains server info
+ (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+ (sleep 4)
+ (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+ (begin
+ (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+ (exit))
+ (loop start-time
+ (equal? sdat last-sdat)
+ sdat))))))))
+
+(define (rmt:register-server sinfo apath iface port server-key dbname)
+ (servdat-conns sinfo) ;; just checking types
+ (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+ (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath
+ (db:run-id->dbname #f)
+ 'register-server `(,iface
+ ,port
+ ,server-key
+ ,(current-process-id)
+ ,iface
+ ,apath
+ ,dbname)))
+
+(define (rmt:get-count-servers sinfo apath)
+ (servdat-conns sinfo) ;; just checking types
+ (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+ (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath
+ (db:run-id->dbname #f)
+ 'get-count-servers `(,apath)))
+
+(define (rmt:get-servers-info apath)
+ (rmt:send-receive 'get-servers-info #f `(,apath)))
+
+(define (rmt:deregister-server db-serv-info apath iface port server-key dbname)
+ (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db
+ (rmt:send-receive-real db-serv-info apath ;; params: host port servkey pid ipaddr dbpath
+ (db:run-id->dbname #f)
+ 'deregister-server `(,iface
+ ,port
+ ,server-key
+ ,(current-process-id)
+ ,iface
+ ,apath
+ ,dbname)))
+
+(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100))
+ ;; wait until *db-serv-info* stops changing
+ (let* ((stime (current-seconds)))
+ (let loop ((last-host #f)
+ (last-port #f)
+ (tries 0))
+ (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*)))
+ (curr-port (and *db-serv-info* (servdat-port *db-serv-info*))))
+ ;; first we verify port and interface, update *db-serv-info* in need be.
+ (cond
+ ((> tries num-tries-allowed)
+ (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.")
+ (exit 1))
+ ((not *db-serv-info*)
+ (thread-sleep! 0.25)
+ (loop curr-host curr-port (+ tries 1)))
+ ((or (not last-host)(not last-port))
+ (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries)
+ (thread-sleep! 0.25)
+ (loop curr-host curr-port (+ tries 1)))
+ ((or (not (equal? last-host curr-host))
+ (not (equal? last-port curr-port)))
+ (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+ (thread-sleep! 0.25)
+ (loop curr-host curr-port (+ tries 1)))
+ ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed
+ (thread-sleep! 0.5)
+ (loop curr-host curr-port (+ tries 1)))
+ (else
+ (rmt:get-signature) ;; sets *my-signature* as side effect
+ (servdat-status-set! *db-serv-info* 'interface-stable)
+ (debug:print 0 *default-log-port*
+ "SERVER STARTED: " curr-host
+ ":" curr-port
+ " AT " (current-seconds) " server signature: " *my-signature*
+ " with "(servdat-trynum *db-serv-info*)" port changes")
+ (flush-output *default-log-port*)
+ #t))))))
+
+;; run rmt:keep-running in a parallel thread to monitor that the db is being
+;; used and to shutdown after sometime if it is not.
+;;
+(define (rmt:keep-running dbname)
+ ;; if none running or if > 20 seconds since
+ ;; server last used then start shutdown
+ ;; This thread waits for the server to come alive
+ (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+
+ (let* ((sinfo *db-serv-info*)
+ (server-start-time (current-seconds))
+ (pkts-dir (get-pkts-dir))
+ (server-key (rmt:get-signature)) ;; This servers key
+ (is-main (equal? (args:get-arg "-db") ".db/main.db"))
+ (last-access 0)
+ (server-timeout (server:expiration-timeout))
+ (shutdown-server-sequence (lambda (host port)
+ (set! *unclean-shutdown* #f) ;; Should not be needed anymore
+ (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+ ;; (rmt:server-shutdown host port) -- called in on-exit
+ ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit
+ (exit)))
+ (timed-out? (lambda ()
+ (<= (+ last-access server-timeout)
+ (current-seconds)))))
+ (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db"))
+ ;; main and run db servers have both got wait logic (could/should merge it)
+ (if is-main
+ (rmt:wait-for-server pkts-dir dbname server-key)
+ (rmt:wait-for-stable-interface))
+ ;; this is our forever loop
+ (let* ((iface (servdat-host *db-serv-info*))
+ (port (servdat-port *db-serv-info*))
+ (uconn (servdat-uconn *db-serv-info*)))
+ (let loop ((count 0)
+ (bad-sync-count 0)
+ (start-time (current-milliseconds)))
+ (if (and (not is-main)
+ (common:low-noise-print 60 "servdat-status"))
+ (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*)))
+
+ (mutex-lock! *heartbeat-mutex*)
+ ;; set up the database handle
+ (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
+ (let ((watchdog (bdat-watchdog *bdat*)))
+ (debug:print 0 *default-log-port* "SERVER: dbprep")
+ (db:setup dbname) ;; sets *dbstruct-db* as side effect
+ (servdat-status-set! *db-serv-info* 'db-opened)
+ ;; IFF I'm not main, call into main and register self
+ (if (not is-main)
+ (let ((res (rmt:register-server sinfo
+ *toppath* iface port
+ server-key dbname)))
+ (if res ;; we are the server
+ (servdat-status-set! *db-serv-info* 'have-interface-and-db)
+ ;; now check that the db locker is alive, clear it out if not
+ (let* ((serv-info (rmt:server-info *toppath* dbname)))
+ (match serv-info
+ ((host port servkey pid ipaddr apath dbpath)
+ (if (not (server-ready? uconn (conc host":"port) servkey))
+ (begin
+ (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
+ (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
+ (loop (+ count 1) bad-sync-count start-time))))
+ (else
+ (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
+ (exit)))))))
+ (debug:print 0 *default-log-port*
+ "SERVER: running, db "dbname" opened, megatest version: "
+ (common:get-full-version))
+ ;; start the watchdog
+
+ ;; is this really needed?
+
+ #;(if watchdog
+ (if (not (member (thread-state watchdog)
+ '(ready running blocked
+ sleeping dead)))
+ (begin
+ (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
+ (thread-start! watchdog))
+ (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
+ (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
+ #;(loop (+ count 1) bad-sync-count start-time)
+ ))
+
+ (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
+
+ (mutex-unlock! *heartbeat-mutex*)
+
+ ;; when things go wrong we don't want to be doing the various
+ ;; queries too often so we strive to run this stuff only every
+ ;; four seconds or so.
+ (let* ((sync-time (- (current-milliseconds) start-time))
+ (rem-time (quotient (- 4000 sync-time) 1000)))
+ (if (and (<= rem-time 4)
+ (> rem-time 0))
+ (thread-sleep! rem-time)))
+
+ ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+ (set! last-access *db-last-access*)
+
+ (if (< count 1) ;; 3x3 = 9 secs aprox
+ (loop (+ count 1) bad-sync-count (current-milliseconds)))
+
+ (if (common:low-noise-print 60 "dbstats")
+ (begin
+ (debug:print 0 *default-log-port* "Server stats:")
+ (db:print-current-query-stats)))
+ (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
+ (cond
+ ((not *server-run*)
+ (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
+ (shutdown-server-sequence (get-host-name) port))
+ ((timed-out?)
+ (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+ (shutdown-server-sequence (get-host-name) port))
+ ((and *server-run*
+ (or (not (timed-out?))
+ (if is-main ;; do not exit if there are other servers (keep main open until all others gone)
+ (> (rmt:get-count-servers sinfo *toppath*) 1)
+ #f)))
+ (if (common:low-noise-print 120 "server continuing")
+ (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
+ (loop 0 bad-sync-count (current-milliseconds)))
+ (else
+ (set! *unclean-shutdown* #f)
+ (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+ (shutdown-server-sequence (get-host-name) port)
+ #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
+ (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown
+ (sexpr->string 'quit))))))))))
+
+(define (rmt:get-reasonable-hostname)
+ (let* ((inhost (or (args:get-arg "-server") "-")))
+ (if (equal? inhost "-")
+ (get-host-name)
+ inhost)))
+
+;; Call this to start the actual server
+;;
+;; all routes though here end in exit ...
+;;
+;; This is the point at which servers are started
+;;
+(define (rmt:server-launch dbname)
+ (debug:print-info 0 *default-log-port* "Entered rmt:server-launch")
+ (let* ((th2 (make-thread (lambda ()
+ (debug:print-info 0 *default-log-port* "Server run thread started")
+ (rmt:run (rmt:get-reasonable-hostname)))
+ "Server run"))
+ (th3 (make-thread (lambda ()
+ (debug:print-info 0 *default-log-port* "Server monitor thread started")
+ (if (args:get-arg "-server")
+ (rmt:keep-running dbname)))
+ "Keep running")))
+ (thread-start! th2)
+ (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
+ (thread-start! th3)
+ (set! *didsomething* #t)
+ (thread-join! th2)
+ (thread-join! th3))
+ #f)
+
+;;======================================================================
+;; S E R V E R - D I R E C T C A L L S
+;;======================================================================
+
+(define (rmt:kill-server run-id)
+ (rmt:send-receive 'kill-server #f (list run-id)))
+
+(define (rmt:start-server run-id)
+ (rmt:send-receive 'start-server #f (list run-id)))
+
+(define (rmt:server-info apath dbname)
+ (rmt:send-receive 'get-server-info #f (list apath dbname)))
+
+;;======================================================================
+;; Nanomsg transport
+;;======================================================================
+
+#;(define (is-port-in-use port-num)
+ (let* ((ret #f))
+ (let-values (((inp oup pid)
+ (process "netstat" (list "-tulpn" ))))
+ (let loop ((inl (read-line inp)))
+ (if (not (eof-object? inl))
+ (begin
+ (if (string-search (regexp (conc ":" port-num)) inl)
+ (begin
+ ;(print "Output: " inl)
+ (set! ret #t))
+ (loop (read-line inp)))))))
+ ret))
+
+#;(define (open-nn-connection host-port)
+ (let ((req (make-req-socket))
+ (uri (conc "tcp://" host-port)))
+ (nng-dial req uri)
+ (socket-set! req 'nng/recvtimeo 2000)
+ req))
+
+#;(define (send-receive-nn req msg)
+ (nng-send req msg)
+ (nng-recv req))
+
+#;(define (close-nn-connection req)
+ (nng-close! req))
+
+;; ;; open connection to server, send message, close connection
+;; ;;
+;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+;; (let ((req (make-req-socket 'req))
+;; (uri (conc "tcp://" host-port))
+;; (res #f)
+;; ;; (contacts (alist-ref 'contact attrib))
+;; ;; (mode (alist-ref 'mode attrib))
+;; )
+;; (socket-set! req 'nng/recvtimeo 2000)
+;; (handle-exceptions
+;; exn
+;; (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+;; ;; Send notification
+;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" )
+;; #f)
+;; (nng-dial req uri)
+;; ;; (print "Connected to the server " )
+;; (nng-send req msg)
+;; ;; (print "Request Sent")
+;; (let* ((th1 (make-thread (lambda ()
+;; (let ((resp (nng-recv req)))
+;; (nng-close! req)
+;; (set! res (if (equal? resp "ok")
+;; #t
+;; #f))))
+;; "recv thread"))
+;; (th2 (make-thread (lambda ()
+;; (thread-sleep! timeout)
+;; (thread-terminate! th1))
+;; "timer thread")))
+;; (thread-start! th1)
+;; (thread-start! th2)
+;; (thread-join! th1)
+;; res))))
+;;
+#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+ (let ((req (make-req-socket))
+ (uri (conc "tcp://" host-port))
+ (res #f))
+ (handle-exceptions
+ exn
+ (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+ ;; Send notification
+ (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn)
+ #f)
+ (nng-dial req uri)
+ (nng-send req msg)
+ (let* ((th1 (make-thread (lambda ()
+ (let ((resp (nng-recv req)))
+ (nng-close! req)
+ ;; (print resp)
+ (set! res resp)))
+ "recv thread"))
+ (th2 (make-thread (lambda ()
+ (thread-sleep! timeout)
+ (thread-terminate! th1))
+ "timer thread")))
+ (thread-start! th1)
+ (thread-start! th2)
+ (thread-join! th1)
+ res))))
+
+;;======================================================================
+;; S E R V E R U T I L I T I E S
+;;======================================================================
+
+;; run ping in separate process, safest way in some cases
+;;
+#;(define (server:ping-server ifaceport)
+ (with-input-from-pipe
+ (conc (common:get-megatest-exe) " -ping " ifaceport)
+ (lambda ()
+ (let loop ((inl (read-line))
+ (res "NOREPLY"))
+ (if (eof-object? inl)
+ (case (string->symbol res)
+ ((NOREPLY) #f)
+ ((LOGIN_OK) #t)
+ (else #f))
+ (loop (read-line) inl))))))
+
+;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;;
+#;(define (server:login toppath)
+ (lambda (toppath)
+ (set! *db-last-access* (current-seconds)) ;; might not be needed.
+ (if (equal? *toppath* toppath)
+ #t
+ #f)))
+
+;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; (define (server:release-sync-lock)
+;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; (define (server:have-sync-lock?)
+;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;; (have-lock? (car have-lock-pair))
+;; (lock-time (cdr have-lock-pair))
+;; (lock-age (- (current-seconds) lock-time)))
+;; (cond
+;; (have-lock? #t)
+;; ((>lock-age
+;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;; (server:release-sync-lock)
+;; (server:have-sync-lock?))
+;; (else #f))))
+
+)
Index: ulex/ulex.scm
==================================================================
--- ulex/ulex.scm
+++ ulex/ulex.scm
@@ -1,8 +1,8 @@
;; ulex: Distributed sqlite3 db
;;;
-;; Copyright (C) 2018 Matt Welland
+;; Copyright (C) 2018-2021 Matt Welland
;; Redistribution and use in source and binary forms, with or without
;; modification, is permitted.
;;
;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -23,330 +23,521 @@
;; NOTES:
;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity.
;;
;;======================================================================
-(use mailbox)
-
-(module ulex
- *
-
-(import scheme posix chicken data-structures ports extras files mailbox)
-(import srfi-18 pkts matchable regex
- typed-records srfi-69 srfi-1
- srfi-4 regex-case
- (prefix sqlite3 sqlite3:)
- foreign
- tcp6
- ;; ulex-netutil
- hostinfo
- )
-
-;; make it a global? Well, it is local to area module
-
-(define *captain-pktspec*
- `((captain (host . h)
- (port . p)
- (pid . i)
- (ipaddr . a)
- )
- #;(data (hostname . h) ;; sender hostname
- (port . p) ;; sender port
- (ipaddr . a) ;; sender ip
- (hostkey . k) ;; sending host key - store info at server under this key
- (servkey . s) ;; server key - this needs to match at server end or reject the msg
- (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json
- (data . d) ;; base64 encoded slln data
- )))
-
-;; struct for keeping track of our world
-
-(defstruct udat
- ;; captain info
- (captain-address #f)
- (captain-host #f)
- (captain-port #f)
- (captain-pid #f)
- (captain-lease 0) ;; time (unix epoc) seconds when the lease is up
- (ulex-dir (conc (get-environment-variable "HOME") "/.ulex"))
- (cpkts-dir (conc (get-environment-variable "HOME") "/.ulex/pkts"))
- (cpkt-spec *captain-pktspec*)
- ;; this processes info
- (my-cpkt-key #f) ;; put Z card here when I create a pkt for myself as captain
- (my-address #f)
- (my-hostname #f)
- (my-port #f)
- (my-pid (current-process-id))
- (my-dbs '())
- ;; server and handler thread
- (serv-listener #f) ;; this processes server info
- (handler-thread #f)
- (mboxes (make-hash-table)) ;; key => mbox
- ;; other servers
- (peers (make-hash-table)) ;; host-port => peer record
- (dbowners (make-hash-table)) ;; dbfile => host-port
- (handlers (make-hash-table)) ;; dbfile => proc
- ;; (outgoing-conns (make-hash-table)) ;; host:port -> conn
- (work-queue (make-queue)) ;; most stuff goes here
- ;; (fast-queue (make-queue)) ;; super quick stuff goes here (e.g. ping)
- (busy #f) ;; is either of the queues busy, use to switch between queuing tasks or doing immediately
- ;; app info
- (appname #f)
- (dbtypes (make-hash-table)) ;; this should be an alist but hash is easier. dbtype => [ initproc syncproc ]
- ;; cookies
- (cnum 0) ;; cookie num
- )
-
-;;======================================================================
-;; NEW APPROACH
-;;======================================================================
-
-;; start-server-find-port ;; gotta have a server port ready from the very begining
-
-;; udata - all the connection info, captain, server, ulex db etc. MUST BE PASSED IN
-;; dbpath - full path and filename of the db to talk to or a symbol naming the db?
-;; callname - the remote call to execute
-;; params - parameters to pass to the remote call
-;;
-(define (remote-call udata dbpath dbtype callname . params)
- (start-server-find-port udata) ;; ensure we have a local server
- (find-or-setup-captain udata)
- ;; look at connect, process-request, send, send-receive
- (let-values (((cookie-key host-port)(get-db-owner udata dbpath dbtype)))
- (send-receive udata host-port callname cookie-key params)))
-
-;;======================================================================
-;; KEY FUNCTIONS - THESE ARE TOO BE EXPOSED AND USED
-;;======================================================================
-
-;; connection setup and management functions
-
-;; This is the basic setup command. Must always be
-;; called before connecting to a db using connect.
-;;
-;; find or become the captain
-;; setup and return a ulex object
-;;
-(define (find-or-setup-captain udata)
- ;; see if we already have a captain and if the lease is ok
- (if (and (udat-captain-address udata)
- (udat-captain-port udata)
- (< (current-seconds) (udat-captain-lease udata)))
- udata
- (let* ((cpkts (get-all-captain-pkts udata)) ;; read captain pkts
- (captn (get-winning-pkt cpkts)))
- (if captn
- (let* ((port (alist-ref 'port captn))
- (host (alist-ref 'host captn))
- (ipaddr (alist-ref 'ipaddr captn))
- (pid (alist-ref 'pid captn))
- (Z (alist-ref 'Z captn)))
- (udat-captain-address-set! udata ipaddr)
- (udat-captain-host-set! udata host)
- (udat-captain-port-set! udata port)
- (udat-captain-pid-set! udata pid)
- (udat-captain-lease-set! udata (+ (current-seconds) 10))
- (let-values (((success pingtime)(ping udata (conc ipaddr ":" port))))
- (if success
- udata
- (begin
- (print "Found unreachable captain at " ipaddr ":" port ", removing pkt")
- (remove-captain-pkt udata captn)
- (find-or-setup-captain udata))))
- (begin
- (setup-as-captain udata) ;; this saves the thread to captain-thread and starts the thread
- (find-or-setup-captain udata)))))))
-
-;; connect to a specific dbfile
-;; - if already connected - return the dbowner host-port
-;; - ask the captain who to talk to for this db
-;; - put the entry in the dbowners hash as dbfile => host-port
-;;
-(define (connect udata dbfname dbtype)
- (or (hash-table-ref/default (udat-dbowners udata) dbfname #f)
- (let-values (((success dbowner-host-port)(get-db-owner udata dbfname dbtype)))
- (if success
- (begin
- ;; just clobber the record, this is the new data no matter what
- (hash-table-set! (udat-dbowners udata) dbfname dbowner-host-port)
- dbowner-host-port)
- #f))))
-
-;; returns: success pingtime
-;;
-;; NOTE: causes the callee to store the info on this host along with the dbs this host currently owns
-;;
-(define (ping udata host-port)
- (let* ((start (current-milliseconds))
- (cookie (make-cookie udata))
- (dbs (udat-my-dbs udata))
- (msg (string-intersperse dbs " "))
- (res (send udata host-port 'ping cookie msg retval: #t))
- (delta (- (current-milliseconds) start)))
- (values (equal? res cookie) delta)))
-
-;; returns: success pingtime
-;;
-;; NOTE: causes all references to this worker to be wiped out in the
-;; callee (ususally the captain)
-;;
-(define (goodbye-ping udata host-port)
- (let* ((start (current-milliseconds))
- (cookie (make-cookie udata))
- (dbs (udat-my-dbs udata))
- (res (send udata host-port 'goodbye cookie "nomsg" retval: #t))
- (delta (- (current-milliseconds) start)))
- (values (equal? res cookie) delta)))
-
-(define (goodbye-captain udata)
- (let* ((host-port (udat-captain-host-port udata)))
- (if host-port
- (goodbye-ping udata host-port)
- (values #f -1))))
-
-(define (get-db-owner udata dbname dbtype)
- (let* ((host-port (udat-captain-host-port udata)))
- (if host-port
- (let* ((cookie (make-cookie udata))
- (msg #f) ;; (conc dbname " " dbtype))
- (params `(,dbname ,dbtype))
- (res (send udata host-port 'db-owner cookie msg
- params: params retval: #t)))
- (match (string-split res)
- ((retcookie owner-host-port)
- (values (equal? retcookie cookie) owner-host-port))))
- (values #f -1))))
-
-;; called in ulex-handler to dispatch work, called on the workers side
-;; calls (proc params data)
-;; returns result with cookie
-;;
-;; pdat is the info of the caller, used to send the result data
-;; prockey is key into udat-handlers hash dereferencing a proc
-;; procparam is a first param handed to proc - often to do further derefrencing
-;; NOTE: params is intended to be a list of strings, encoding on data
-;; is up to the user but data must be a single line
-;;
-(define (process-request udata pdat dbname cookie prockey procparam data)
- (let* ((dbrec (ulex-open-db udata dbname)) ;; this will be a dbconn record, looks for in udata first
- (proc (hash-table-ref udata prockey)))
- (let* ((result (proc dbrec procparam data)))
- result)))
-
-;; remote-request - send to remote to process in process-request
-;; uconn comes from a call to connect and can be used instead of calling connect again
-;; uconn is the host-port to call
-;; we send dbname to the worker so they know which file to open
-;; data must be a string with no newlines, it will be handed to the proc
-;; at the remote site unchanged. It is up to the user to encode/decode it's contents
-;;
-;; rtype: immediate, read-only, normal, low-priority
-;;
-(define (remote-request udata uconn rtype dbname prockey procparam data)
- (let* ((cookie (make-cookie udata)))
- (send-receive udata uconn rtype cookie data `(,prockey procparam))))
-
-(define (ulex-open-db udata dbname)
- #f)
-
-
-;;======================================================================
-;; Ulex db
-;;
-;; - track who is captain, lease expire time
-;; - track who owns what db, lease
-;;
-;;======================================================================
-
-;;
-;;
-(define (ulex-dbfname)
- (let ((dbdir (conc (get-environment-variable "HOME") "/.ulex")))
- (if (not (file-exists? dbdir))
- (create-directory dbdir #t))
- (conc dbdir "/network.db")))
-
-;; always goes in ~/.ulex/network.db
-;; role is captain, adjutant, node
-;;
-(define (ulexdb-setup)
- (let* ((dbfname (ulex-dbfname))
- (have-db (file-exists? dbfname))
- (db (sqlite3:open-database dbfname)))
- (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
- (sqlite3:execute db "PRAGMA synchronous = 0;")
- (if (not have-db)
- (sqlite3:with-transaction
- db
- (lambda ()
- (for-each
- (lambda (stmt)
- (if stmt (sqlite3:execute db stmt)))
- `("CREATE TABLE IF NOT EXISTS nodes
- (id INTEGER PRIMARY KEY,
- role TEXT NOT NULL,
- host TEXT NOT NULL,
- port TEXT NOT NULL,
- ipadr TEXT NOT NULL,
- pid INTEGER NOT NULL,
- zcard TEXT NOT NULL,
- regtime INTEGER DEFAULT (strftime('%s','now')),
- lease_thru INTEGER DEFAULT (strftime('%s','now')),
- last_update INTEGER DEFAULT (strftime('%s','now')));"
- "CREATE TRIGGER IF NOT EXISTS update_nodes_trigger AFTER UPDATE ON nodes
- FOR EACH ROW
- BEGIN
- UPDATE nodes SET last_update=(strftime('%s','now'))
- WHERE id=old.id;
- END;"
- "CREATE TABLE IF NOT EXISTS dbs
- (id INTEGER PRIMARY KEY,
- dbname TEXT NOT NULL,
- dbfile TEXT NOT NULL,
- dbtype TEXT NOT NULL,
- host_port TEXT NOT NULL,
- regtime INTEGER DEFAULT (strftime('%s','now')),
- lease_thru INTEGER DEFAULT (strftime('%s','now')),
- last_update INTEGER DEFAULT (strftime('%s','now')));"
- "CREATE TRIGGER IF NOT EXISTS update_dbs_trigger AFTER UPDATE ON dbs
- FOR EACH ROW
- BEGIN
- UPDATE dbs SET last_update=(strftime('%s','now'))
- WHERE id=old.id;
- END;")))))
- db))
-
-(define (get-host-port-lease db dbfname)
- (sqlite3:fold-row
- (lambda (rem host-port lease-thru)
- (list host-port lease-thru))
- #f db "SELECT host_port,lease_thru FROM dbs WHERE dbfile = ?" dbfname))
-
-(define (register-captain db host ipadr port pid zcard #!key (lease 20))
- (let* ((dbfname (ulex-dbfname))
- (host-port (conc host ":" port)))
- (sqlite3:with-transaction
- db
- (lambda ()
- (match (get-host-port-lease db dbfname)
- ((host-port lease-thru)
- (if (> (current-seconds) lease-thru)
- (begin
- (sqlite3:execute db "UPDATE dbs SET host_port=?,lease_thru=? WHERE dbname=?"
- (conc host ":" port)
- (+ (current-seconds) lease)
- dbfname)
- #t)
- #f))
- (#f (sqlite3:execute db "INSERT INTO dbs (dbname,dbfile,dbtype,host_port,lease_thru) VALUES (?,?,?,?,?)"
- "captain" dbfname "captain" host-port (+ (current-seconds) lease)))
- (else (print "ERROR: Unrecognised result from fold-row")
- (exit 1)))))))
-
+(module ulex
+ *
+ #;(
+
+ ;; NOTE: looking for the handler proc - find the run-listener :)
+
+ run-listener ;; (run-listener handler-proc [port]) => uconn
+
+ ;; NOTE: handler-proc params;
+ ;; (handler-proc rem-host-port qrykey cmd params)
+
+ send-receive ;; (send-receive uconn host-port cmd data)
+
+ ;; NOTE: cmd can be any plain text symbol except for these;
+ ;; 'ping 'ack 'goodbye 'response
+
+ set-work-handler ;; (set-work-handler proc)
+
+ wait-and-close ;; (wait-and-close uconn)
+
+ ulex-listener?
+
+ ;; needed to get the interface:port that was automatically found
+ udat-port
+ udat-host-port
+
+ ;; for testing only
+ ;; pp-uconn
+
+ ;; parameters
+ work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct
+ return-method ;; parameter; 'mailbox, 'polling, 'direct
+ )
+
+(import scheme
+ chicken.base
+ chicken.file
+ chicken.io
+ chicken.time
+ chicken.condition
+ chicken.string
+ chicken.sort
+ chicken.pretty-print
+
+ address-info
+ mailbox
+ matchable
+ ;; queues
+ regex
+ regex-case
+ simple-exceptions
+ s11n
+ srfi-1
+ srfi-18
+ srfi-4
+ srfi-69
+ system-information
+ tcp6
+ typed-records
+ )
+
+;; udat struct, used by both caller and callee
+;; instantiated as uconn by convention
+;;
+(defstruct udat
+ ;; the listener side
+ (port #f)
+ (host-port #f)
+ (socket #f)
+ ;; the peers
+ (peers (make-hash-table)) ;; host:port->peer
+ ;; work handling
+ (work-queue (make-mailbox))
+ (work-proc #f) ;; set by user
+ (cnum 0) ;; cookie number
+ (mboxes (make-hash-table)) ;; for the replies
+ (avail-cmboxes '()) ;; list of ( . ) for re-use
+ ;; threads
+ (numthreads 10)
+ (cmd-thread #f)
+ (work-queue-thread #f)
+ (num-threads-running 0)
+ )
+
+;; Parameters
+
+;; work-method:
+(define work-method (make-parameter 'mailbox))
+;; mailbox - all rdat goes through mailbox
+;; threads - all rdat immediately executed in new thread
+;; direct - no queuing
+;;
+
+;; return-method, return the result to waiting send-receive:
+(define return-method (make-parameter 'mailbox))
+;; mailbox - create a mailbox and use it for passing returning results to send-receive
+;; polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result
+;; direct - no queuing, result is passed back in single tcp connection
+;;
+
+;; ;; struct for keeping track of others we are talking to
+;; ;;
+;; (defstruct pdat
+;; (host-port #f)
+;; (conns '()) ;; list of pcon structs, pop one off when calling the peer
+;; )
+;;
+;; ;; struct for peer connections, keep track of expiration etc.
+;; ;;
+;; (defstruct pcon
+;; (inp #f)
+;; (oup #f)
+;; (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59)
+;; (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes
+;; )
+
+;;======================================================================
+;; listener
+;;======================================================================
+
+;; is uconn a ulex connector (listener)
+;;
+(define (ulex-listener? uconn)
+ (udat? uconn))
+
+;; create a tcp listener and return a populated udat struct with
+;; my port, address, hostname, pid etc.
+;; return #f if fail to find a port to allocate.
+;;
+;; if udata-in is #f create the record
+;; if there is already a serv-listener return the udata
+;;
+(define (setup-listener uconn #!optional (port 4242))
+ (handle-exceptions
+ exn
+ (if (< port 65535)
+ (setup-listener uconn (+ port 1))
+ #f)
+ (connect-listener uconn port)))
+
+(define (connect-listener uconn port)
+ ;; (tcp-listener-socket LISTENER)(socket-name so)
+ ;; sockaddr-address, sockaddr-port, sockaddr->string
+ (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
+ (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
+ (udat-port-set! uconn port)
+ (udat-host-port-set! uconn (conc addr":"port))
+ (udat-socket-set! uconn tlsn)
+ uconn))
+
+;; run-listener does all the work of starting a listener in a thread
+;; it then returns control
+;;
+(define (run-listener handler-proc #!optional (port-suggestion 4242))
+ (let* ((uconn (make-udat)))
+ (udat-work-proc-set! uconn handler-proc)
+ (if (setup-listener uconn port-suggestion)
+ (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop"))
+ (th2 (make-thread (lambda ()
+ (case (work-method)
+ ((mailbox limited)
+ (process-work-queue uconn))))
+ "Ulex work queue processor")))
+ ;; (tcp-buffer-size 2048)
+ (thread-start! th1)
+ (thread-start! th2)
+ (udat-cmd-thread-set! uconn th1)
+ (udat-work-queue-thread-set! uconn th2)
+ (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".")
+ uconn)
+ (assert #f "ERROR: run-listener called without proper setup."))))
+
+(define (wait-and-close uconn)
+ (thread-join! (udat-cmd-thread uconn))
+ (tcp-close (udat-socket uconn)))
+
+;;======================================================================
+;; peers and connections
+;;======================================================================
+
+(define *send-mutex* (make-mutex))
+
+;; send structured data to recipient
+;;
+;; NOTE: qrykey is what was called the "cookie" previously
+;;
+;; retval tells send to expect and wait for return data (one line) and return it or time out
+;; this is for ping where we don't want to necessarily have set up our own server yet.
+;;
+;; NOTE: see below for beginnings of code to allow re-use of tcp connections
+;; - I believe (without substantial evidence) that re-using connections will
+;; be beneficial ...
+;;
+(define (send udata host-port qrykey cmd params)
+ (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this
+ (isme #f #;(equal? host-port my-host-port)) ;; calling myself?
+ ;; dat is a self-contained work block that can be sent or handled locally
+ (dat (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
+ (cond
+ (isme (ulex-handler udata dat)) ;; no transmission needed
+ (else
+ (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
+ exn
+ (message exn)
+ (begin
+ ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ (let-values (((inp oup)(tcp-connect host-port)))
+ (let ((res (if (and inp oup)
+ (begin
+ (serialize dat oup)
+ (close-output-port oup)
+ (deserialize inp)
+ )
+ (begin
+ (print "ERROR: send called but no receiver has been setup. Please call setup first!")
+ #f))))
+ (close-input-port inp)
+ ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ res)))))))) ;; res will always be 'ack unless return-method is direct
+
+(define (send-via-polling uconn host-port cmd data)
+ (let* ((qrykey (make-cookie uconn))
+ (sres (send uconn host-port qrykey cmd data)))
+ (case sres
+ ((ack)
+ (let loop ((start-time (current-milliseconds)))
+ (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
+ (begin
+ (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
+ #f)
+ (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
+ (if result ;; result is '(status . result-data) or #f for nothing yet
+ (begin
+ (hash-table-delete! (udat-mboxes uconn) qrykey)
+ (cdr result))
+ (begin
+ (thread-sleep! 0.01)
+ (loop start-time)))))))
+ (else
+ (print "ULEX ERROR: Communication failed? sres="sres)
+ #f))))
+
+(define (send-via-mailbox uconn host-port cmd data)
+ (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
+ (qrykey (car cmbox))
+ (mbox (cdr cmbox))
+ (mbox-time (current-milliseconds))
+ (sres (send uconn host-port qrykey cmd data))) ;; short res
+ (if (eq? sres 'ack) ;; BUG: change to be less than server:expiration-timeout?
+ (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread)))
+ #f
+ 120)) ;; timeout)
+ (mbox-timeout-result 'MBOX_TIMEOUT)
+ (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
+ (mbox-receive-time (current-milliseconds)))
+ ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
+ (hash-table-delete! (udat-mboxes uconn) qrykey)
+ (if (eq? res 'MBOX_TIMEOUT)
+ (begin
+ (print "WARNING: mbox timed out for query "cmd", with data "data
+ ", waiting for response from "host-port".")
+
+ ;; here it might make sense to clean up connection records and force clean start?
+ ;; NO. The progam using ulex needs to do the reset. Right thing here is exception
+
+ #f) ;; convert to raising exception?
+ res))
+ (begin
+ (print "ERROR: Communication failed? Got "sres)
+ #f))))
+
+;; send a request to the given host-port and register a mailbox in udata
+;; wait for the mailbox data and return it
+;;
+(define (send-receive uconn host-port cmd data)
+ (let* ((start-time (current-milliseconds))
+ (result (cond
+ ((member cmd '(ping goodbye)) ;; these are immediate
+ (send uconn host-port 'ping cmd data))
+ ((eq? (work-method) 'direct)
+ ;; the result from send will be the actual result, not an 'ack
+ (send uconn host-port 'direct cmd data))
+ (else
+ (case (return-method)
+ ((polling)
+ (send-via-polling uconn host-port cmd data))
+ ((mailbox)
+ (send-via-mailbox uconn host-port cmd data))
+ (else
+ (print "ULEX ERROR: unrecognised return-method "(return-method)".")
+ #f)))))
+ (duration (- (current-milliseconds) start-time)))
+ ;; this is ONLY for development and debugging. It will be removed once Ulex is stable.
+ (if (< 5000 duration)
+ (print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000)))
+ " seconds; "cmd", host-port="host-port", data="data))
+ result))
+
+
+;;======================================================================
+;; responder side
+;;======================================================================
+
+;; take a request, rdat, and if not immediate put it in the work queue
+;;
+;; Reserved cmds; ack ping goodbye response
+;;
+(define (ulex-handler uconn rdat)
+ (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
+ (match rdat ;; (string-split controldat)
+ ((rem-host-port qrykey cmd params);; timedata)
+ ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
+ (case cmd
+ ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
+ ((ping)
+ ;; (print "Got Ping!")
+ ;; (add-to-work-queue uconn rdat)
+ 'ack)
+ ((goodbye)
+ ;; just clear out references to the caller. NOT COMPLETE
+ (add-to-work-queue uconn rdat)
+ 'ack)
+ ((response) ;; this is a result from remote processing, send it as mail ...
+ (case (return-method)
+ ((polling)
+ (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params))
+ 'ack)
+ ((mailbox)
+ (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f)))
+ (if mbox
+ (begin
+ (mailbox-send! mbox params) ;; params here is our result
+ 'ack)
+ (begin
+ (print "ERROR: received result but no associated mbox for cookie "qrykey)
+ 'no-mbox-found))))
+ (else (print "ULEX ERROR: unrecognised return-method "(return-method))
+ 'bad-return-method)))
+ (else ;; generic request - hand it to the work queue
+ (add-to-work-queue uconn rdat)
+ 'ack)))
+ (else
+ (print "ULEX ERROR: bad rdat "rdat)
+ 'bad-rdat)))
+
+;; given an already set up uconn start the cmd-loop
+;;
+(define (ulex-cmd-loop uconn)
+ (let* ((serv-listener (udat-socket uconn))
+ (listener (lambda ()
+ (let loop ((state 'start))
+ (let-values (((inp oup)(tcp-accept serv-listener)))
+ ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params)
+ (resp (ulex-handler uconn rdat)))
+ (serialize resp oup)
+ (close-input-port inp)
+ (close-output-port oup)
+ ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ )
+ (loop state))))))
+ ;; start N of them
+ (let loop ((thnum 0)
+ (threads '()))
+ (if (< thnum 100)
+ (let* ((th (make-thread listener (conc "listener" thnum))))
+ (thread-start! th)
+ (loop (+ thnum 1)
+ (cons th threads)))
+ (map thread-join! threads)))))
+
+;; add a proc to the cmd list, these are done symetrically (i.e. in all instances)
+;; so that the proc can be dereferenced remotely
+;;
+(define (set-work-handler uconn proc)
+ (udat-work-proc-set! uconn proc))
+
+;;======================================================================
+;; work queues - this is all happening on the listener side
+;;======================================================================
+
+;; rdat is (rem-host-port qrykey cmd params)
+
+(define (add-to-work-queue uconn rdat)
+ #;(queue-add! (udat-work-queue uconn) rdat)
+ (case (work-method)
+ ((threads)
+ (thread-start! (make-thread (lambda ()
+ (do-work uconn rdat))
+ "worker thread")))
+ ((mailbox)
+ (mailbox-send! (udat-work-queue uconn) rdat))
+ ((direct)
+ (do-work uconn rdat))
+ (else
+ (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.")
+ (mailbox-send! (udat-work-queue uconn) rdat))))
+
+;; move the logic to return the result somewhere else?
+;;
+(define (do-work uconn rdat)
+ (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change
+ ;; put this following into a do-work procedure
+ (match rdat
+ ((rem-host-port qrykey cmd params)
+ (let* ((start-time (current-milliseconds))
+ (result (proc rem-host-port qrykey cmd params))
+ (end-time (current-milliseconds))
+ (run-time (- end-time start-time)))
+ (case (work-method)
+ ((direct) result)
+ (else
+ (if (> run-time 1000)(print "ULEX: Warning, work "cmd", "params" done in "run-time" ms"))
+ ;; send 'response as cmd and result as params
+ (send uconn rem-host-port qrykey 'response result) ;; could check for ack
+ (let* ((duration (- (current-milliseconds) end-time)))
+ (if (> duration 500)(print "ULEX: Warning, response sent back to "rem-host-port" for "qrykey" in "duration"ms")))))))
+ (MBOX_TIMEOUT 'do-work-timeout)
+ (else
+ (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))))
+
+;; NEW APPROACH:
+;;
+(define (process-work-queue uconn)
+ (let ((wqueue (udat-work-queue uconn))
+ (proc (udat-work-proc uconn))
+ (numthr (udat-numthreads uconn)))
+ (let loop ((thnum 1)
+ (threads '()))
+ (let ((thlst (cons (make-thread (lambda ()
+ (let work-loop ()
+ (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT)))
+ (do-work uconn rdat))
+ (work-loop)))
+ (conc "work thread " thnum))
+ threads)))
+ (if (< thnum numthr)
+ (loop (+ thnum 1)
+ thlst)
+ (begin
+ (print "ULEX: Starting "(length thlst)" worker threads.")
+ (map thread-start! thlst)
+ (print "ULEX: Threads started. Joining all.")
+ (map thread-join! thlst)))))))
+
+;; below was to enable re-use of connections. This seems non-trivial so for
+;; now lets open on each call
+;;
+;; ;; given host-port get or create peer struct
+;; ;;
+;; (define (udat-get-peer uconn host-port)
+;; (or (hash-table-ref/default (udat-peers uconn) host-port #f)
+;; ;; no peer, so create pdat and init it
+;;
+;; ;; NEED stack of connections, pop and use; inp, oup,
+;; ;; creation_time (remove and create new if over 24hrs old
+;; ;;
+;; (let ((pdat (make-pdat host-port: host-port)))
+;; (hash-table-set! (udat-peers uconn) host-port pdat)
+;; pdat)))
+;;
+;; ;; is pcon alive
+;;
+;; ;; given host-port and pdat get a pcon
+;; ;;
+;; (define (pdat-get-pcon pdat host-port)
+;; (let loop ((conns (pdat-conns pdat)))
+;; (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later
+;; (init-pcon (make-pcon))
+;; (let* ((conn (pop conns)))
+;;
+;; ;; given host-port get a pcon struct
+;; ;;
+;; (define (udat-get-pcon
+
+;;======================================================================
+;; misc utils
+;;======================================================================
+
+(define (make-cookie uconn)
+ (let ((newcnum (+ (udat-cnum uconn) 1)))
+ (udat-cnum-set! uconn newcnum)
+ (conc (udat-host-port uconn) ":"
+ newcnum)))
+
+;; cookie/mboxes
+
+;; we store each mbox with a cookie ( . )
+;;
+(define (get-cmbox uconn)
+ (if (null? (udat-avail-cmboxes uconn))
+ (let ((cookie (make-cookie uconn))
+ (mbox (make-mailbox)))
+ (hash-table-set! (udat-mboxes uconn) cookie mbox)
+ `(,cookie . ,mbox))
+ (let ((cmbox (car (udat-avail-cmboxes uconn))))
+ (udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn)))
+ cmbox)))
+
+(define (put-cmbox uconn cmbox)
+ (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn))))
+
+(define (pp-uconn uconn)
+ (pp (udat->alist uconn)))
+
+
;;======================================================================
;; network utilities
;;======================================================================
+
+;; NOTE: Look at address-info egg as alternative to some of this
(define (rate-ip ipaddr)
(regex-case ipaddr
( "^127\\..*" _ 0 )
( "^(10\\.0|192\\.168)\\..*" _ 1 )
@@ -354,1899 +545,26 @@
;; Change this to bias for addresses with a reasonable broadcast value?
;;
(define (ip-pref-less? a b)
(> (rate-ip a) (rate-ip b)))
-
(define (get-my-best-address)
- (let ((all-my-addresses (get-all-ips))
- ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))
- )
+ (let ((all-my-addresses (get-all-ips)))
(cond
((null? all-my-addresses)
(get-host-name)) ;; no interfaces?
((eq? (length all-my-addresses) 1)
(car all-my-addresses)) ;; only one to choose from, just go with it
-
(else
- (car (sort all-my-addresses ip-pref-less?)))
- ;; (else
- ;; (ip->string (car (filter (lambda (x) ;; take any but 127.
- ;; (not (eq? (u8vector-ref x 0) 127)))
- ;; all-my-addresses))))
-
- )))
+ (car (sort all-my-addresses ip-pref-less?))))))
(define (get-all-ips-sorted)
(sort (get-all-ips) ip-pref-less?))
(define (get-all-ips)
- (map ip->string (vector->list
- (hostinfo-addresses
- (host-information (current-hostname))))))
-
-(define (udat-my-host-port udata)
- (if (and (udat-my-address udata)(udat-my-port udata))
- (conc (udat-my-address udata) ":" (udat-my-port udata))
- #f))
-
-(define (udat-captain-host-port udata)
- (if (and (udat-captain-address udata)(udat-captain-port udata))
- (conc (udat-captain-address udata) ":" (udat-captain-port udata))
- #f))
-
-(define (udat-get-peer udata host-port)
- (hash-table-ref/default (udat-peers udata) host-port #f))
-
-;; struct for keeping track of others we are talking to
-
-(defstruct peer
- (addr-port #f)
- (hostname #f)
- (pid #f)
- ;; (inp #f)
- ;; (oup #f)
- (dbs '()) ;; list of databases this peer is currently handling
- )
-
-(defstruct work
- (peer-dat #f)
- (handlerkey #f)
- (qrykey #f)
- (data #f)
- (start (current-milliseconds)))
-
-#;(defstruct dbowner
- (pdat #f)
- (last-update (current-seconds)))
-
-;;======================================================================
-;; Captain functions
-;;======================================================================
-
-;; NB// This needs to be started in a thread
-;;
-;; setup to be a captain
-;; - local server MUST be started already
-;; - create pkt
-;; - start server port handler
-;;
-(define (setup-as-captain udata)
- (if (create-captain-pkt udata)
- (let* ((my-addr (udat-my-address udata))
- (my-port (udat-my-port udata))
- (th (make-thread (lambda ()
- (ulex-handler-loop udata)) "Captain handler")))
- (udat-handler-thread-set! udata th)
- (udat-captain-address-set! udata my-addr)
- (udat-captain-port-set! udata my-port)
- (thread-start! th))
- (begin
- (print "ERROR: failed to create captain pkt")
- #f)))
-
-;; given a pkts dir read
-;;
-(define (get-all-captain-pkts udata)
- (let* ((pktsdir (let ((d (udat-cpkts-dir udata)))
- (if (file-exists? d)
- d
- (begin
- (create-directory d #t)
- d))))
- (all-pkt-files (glob (conc pktsdir "/*.pkt")))
- (pkt-spec (udat-cpkt-spec udata)))
- (map (lambda (pkt-file)
- (read-pkt->alist pkt-file pktspec: pkt-spec))
- all-pkt-files)))
-
-;; sort by D then Z, return one, choose the oldest then
-;; differentiate if needed using the Z key
-;;l
-(define (get-winning-pkt pkts)
- (if (null? pkts)
- #f
- (car (sort pkts (lambda (a b)
- (let ((ad (string->number (alist-ref 'D a)))
- (bd (string->number (alist-ref 'D b))))
- (if (eq? a b)
- (let ((az (alist-ref 'Z a))
- (bz (alist-ref 'Z b)))
- (string>=? az bz))
- (> ad bd))))))))
-
-;; put the host, ip, port and pid into a pkt in
-;; the captain pkts dir
-;; - assumes user has already fired up a server
-;; which will be in the udata struct
-;;
-(define (create-captain-pkt udata)
- (if (not (udat-serv-listener udata))
- (begin
- (print "ERROR: create-captain-pkt called with out a listener")
- #f)
- (let* ((pktdat `((port . ,(udat-my-port udata))
- (host . ,(udat-my-hostname udata))
- (ipaddr . ,(udat-my-address udata))
- (pid . ,(udat-my-pid udata))))
- (pktdir (udat-cpkts-dir udata))
- (pktspec (udat-cpkt-spec udata))
- )
- (udat-my-cpkt-key-set!
- udata
- (write-alist->pkt
- pktdir
- pktdat
- pktspec: pktspec
- ptype: 'captain))
- (udat-my-cpkt-key udata))))
-
-;; remove pkt associated with captn (the Z key .pkt)
-;;
-(define (remove-captain-pkt udata captn)
- (let ((Z (alist-ref 'Z captn))
- (cpktdir (udat-cpkts-dir udata)))
- (delete-file* (conc cpktdir "/" Z ".pkt"))))
-
-;; call all known peers and tell them to delete their info on the captain
-;; thus forcing them to re-read pkts and connect to a new captain
-;; call this when the captain needs to exit and if an older captain is
-;; detected. Due to delays in sending file meta data in NFS multiple
-;; captains can be initiated in a "Storm of Captains", book soon to be
-;; on Amazon
-;;
-(define (drop-captain udata)
- (let* ((peers (hash-table-keys (udat-peers udata)))
- (cookie (make-cookie udata)))
- (for-each
- (lambda (host-port)
- (send udata host-port 'dropcaptain cookie "nomsg" retval: #t))
- peers)))
-
-;;======================================================================
-;; server primitives
-;;======================================================================
-
-(define (make-cookie udata)
- (let ((newcnum (+ (udat-cnum udata) 1)))
- (udat-cnum-set! udata newcnum)
- (conc (udat-my-address udata) ":"
- (udat-my-port udata) "-"
- (udat-my-pid udata) "-"
- newcnum)))
-
-;; create a tcp listener and return a populated udat struct with
-;; my port, address, hostname, pid etc.
-;; return #f if fail to find a port to allocate.
-;;
-;; if udata-in is #f create the record
-;; if there is already a serv-listener return the udata
-;;
-(define (start-server-find-port udata-in #!optional (port 4242))
- (let ((udata (or udata-in (make-udat))))
- (if (udat-serv-listener udata) ;; TODO - add check that the listener is alive and ready?
- udata
- (handle-exceptions
- exn
- (if (< port 65535)
- (start-server-find-port udata (+ port 1))
- #f)
- (connect-server udata port)))))
-
-(define (connect-server udata port)
- ;; (tcp-listener-socket LISTENER)(socket-name so)
- ;; sockaddr-address, sockaddr-port, sockaddr->string
- (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
- (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
- (udat-my-address-set! udata addr)
- (udat-my-port-set! udata port)
- (udat-my-hostname-set! udata (get-host-name))
- (udat-serv-listener-set! udata tlsn)
- udata))
-
-(define (get-peer-dat udata host-port #!optional (hostname #f)(pid #f))
- (let* ((pdat (or (udat-get-peer udata host-port)
- (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE SPECIFIC
- exn
- #f
- (let ((npdat (make-peer addr-port: host-port)))
- (if hostname (peer-hostname-set! npdat hostname))
- (if pid (peer-pid-set! npdat pid))
- npdat)))))
- pdat))
-
-;; send structured data to recipient
-;;
-;; NOTE: qrykey is what was called the "cookie" previously
-;;
-;; retval tells send to expect and wait for return data (one line) and return it or time out
-;; this is for ping where we don't want to necessarily have set up our own server yet.
-;;
-(define (send udata host-port handler qrykey data
- #!key (hostname #f)(pid #f)(params '())(retval #f))
- (let* ((my-host-port (udat-my-host-port udata))
- (isme (equal? host-port my-host-port)) ;; am I calling
- ;; myself?
- (dat (list
- handler ;; " "
- my-host-port ;; " "
- (udat-my-pid udata) ;; " "
- qrykey
- params ;;(if (null? params) "" (conc " "
- ;;(string-intersperse params " ")))
- )))
- ;; (print "send isme is " (if isme "true!" "false!") ",
- ;; my-host-port: " my-host-port ", host-port: " host-port)
- (if isme
- (ulex-handler udata dat data)
- (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE
- ;; SPECIFIC
- exn
- #f
- (let-values (((inp oup)(tcp-connect host-port)))
- ;;
- ;; CONTROL LINE:
- ;; handlerkey host:port pid qrykey params ...
- ;;
- (let ((res
- (if (and inp oup)
- (let* ()
- (if my-host-port
- (begin
- (write dat oup)
- (write data oup) ;; send as sexpr
- ;; (print "Sent dat: " dat " data: " data)
- (if retval
- (read inp)
- #t))
- (begin
- (print "ERROR: send called but no receiver has been setup. Please call setup first!")
- #f))
- ;; NOTE: DO NOT BE TEMPTED TO LOOK AT ANY DATA ON INP HERE!
- ;; (there is a listener for handling that)
- )
- #f))) ;; #f means failed to connect and send
- (close-input-port inp)
- (close-output-port oup)
- res))))))
-
-;; send a request to the given host-port and register a mailbox in udata
-;; wait for the mailbox data and return it
-;;
-(define (send-receive udata host-port handler qrykey data #!key (hostname #f)(pid #f)(params '())(timeout 20))
- (let ((mbox (make-mailbox))
- (mbox-time (current-milliseconds))
- (mboxes (udat-mboxes udata)))
- (hash-table-set! mboxes qrykey mbox)
- (if (send udata host-port handler qrykey data hostname: hostname pid: pid params: params)
- (let* ((mbox-timeout-secs timeout)
- (mbox-timeout-result 'MBOX_TIMEOUT)
- (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
- (mbox-receive-time (current-milliseconds)))
- (hash-table-delete! mboxes qrykey)
- (if (eq? res 'MBOX_TIMEOUT)
- #f
- res))
- #f))) ;; #f means failed to communicate
-
-;;
-(define (ulex-handler udata controldat data)
- (print "controldat: " controldat " data: " data)
- (match controldat ;; (string-split controldat)
- ((handlerkey host-port pid qrykey params ...)
- ;; (print "handlerkey: " handlerkey " host-port: " host-port " pid: " pid " qrykey: " qrykey " params: " params)
- (case handlerkey ;; (string->symbol handlerkey)
- ((ack)(print "Got ack!"))
- ((ping) ;; special case - return result immediately on the same connection
- (let* ((proc (hash-table-ref/default (udat-handlers udata) 'ping #f))
- (val (if proc (proc) "gotping"))
- (peer (make-peer addr-port: host-port pid: pid))
- (dbshash (udat-dbowners udata)))
- (peer-dbs-set! peer params) ;; params for ping is list of dbs owned by pinger
- (for-each (lambda (dbfile)
- (hash-table-set! dbshash dbfile host-port)) ;; WRONG?
- params) ;; register each db in the dbshash
- (if (not (hash-table-exists? (udat-peers udata) host-port))
- (hash-table-set! (udat-peers udata) host-port peer)) ;; save the details of this caller in peers
- qrykey)) ;; End of ping
- ((goodbye)
- ;; remove all traces of the caller in db ownership etc.
- (let* ((peer (hash-table-ref/default (udat-peers udata) host-port #f))
- (dbs (if peer (peer-dbs peer) '()))
- (dbshash (udat-dbowners udata)))
- (for-each (lambda (dbfile)(hash-table-delete! dbshash dbfile)) dbs)
- (hash-table-delete! (udat-peers udata) host-port)
- qrykey))
- ((dropcaptain)
- ;; remove all traces of the captain
- (udat-captain-address-set! udata #f)
- (udat-captain-host-set! udata #f)
- (udat-captain-port-set! udata #f)
- (udat-captain-pid-set! udata #f)
- qrykey)
- ((rucaptain) ;; remote is asking if I'm the captain
- (if (udat-my-cpkt-key udata) "yes" "no"))
- ((db-owner) ;; given a db name who do I send my queries to
- ;; look up the file in handlers, if have an entry ping them to be sure
- ;; they are still alive and then return that host:port.
- ;; if no handler found or if the ping fails pick from peers the oldest that
- ;; is managing the fewest dbs
- (match params
- ((dbfile dbtype)
- (let* ((owner-host-port (hash-table-ref/default (udat-dbowners udata) dbfile #f)))
- (if owner-host-port
- (conc qrykey " " owner-host-port)
- (let* ((pdat (or (hash-table-ref/default (udat-peers udata) host-port #f) ;; no owner - caller gets to own it!
- (make-peer addr-port: host-port pid: pid dbs: `(,dbfile)))))
- (hash-table-set! (udat-peers udata) host-port pdat)
- (hash-table-set! (udat-dbowners udata) dbfile host-port)
- (conc qrykey " " host-port)))))
- (else (conc qrykey " BADDATA"))))
- ;; for work items:
- ;; handler is one of; immediate, read-only, read-write, high-priority
- ((immediate read-only normal low-priority) ;; do this work immediately
- ;; host-port (caller), pid (caller), qrykey (cookie), params <= all from first line
- ;; data => a single line encoded however you want, or should I build json into it?
- (print "handlerkey=" handlerkey)
- (let* ((pdat (get-peer-dat udata host-port)))
- (match params ;; dbfile prockey procparam
- ((dbfile prockey procparam)
- (case handlerkey
- ((immediate read-only)
- (process-request udata pdat dbfile qrykey prockey procparam data))
- ((normal low-priority) ;; split off later and add logic to support low priority
- (add-to-work-queue udata pdat dbfile qrykey prockey procparam data))
- (else
- #f)))
- (else
- (print "INFO: params=" params " handlerkey=" handlerkey " controldat=" controldat)
- #f))))
- (else
- ;; (add-to-work-queue udata (get-peer-dat udata host-port) handlerkey qrykey data)
- #f)))
- (else
- (print "BAD DATA? controldat=" controldat " data=" data)
- #f)));; handles the incoming messages and dispatches to queues
-
-;;
-(define (ulex-handler-loop udata)
- (let* ((serv-listener (udat-serv-listener udata)))
- ;; data comes as two lines
- ;; handlerkey resp-addr:resp-port hostname pid qrykey [dbpath/dbfile.db]
- ;; data
- (let loop ((state 'start))
- (let-values (((inp oup)(tcp-accept serv-listener)))
- (let* ((controldat (read inp))
- (data (read inp))
- (resp (ulex-handler udata controldat data)))
- (if resp (write resp oup))
- (close-input-port inp)
- (close-output-port oup))
- (loop state)))))
-
-;; add a proc to the handler list, these are done symetrically (i.e. in all instances)
-;; so that the proc can be dereferenced remotely
-;;
-(define (register-handler udata key proc)
- (hash-table-set! (udat-handlers udata) key proc))
-
-
-;;======================================================================
-;; work queues
-;;======================================================================
-
-(define (add-to-work-queue udata peer-dat handlerkey qrykey data)
- (let ((wdat (make-work peer-dat: peer-dat handlerkey: handlerkey qrykey: qrykey data: data)))
- (if (udat-busy udata)
- (queue-add! (udat-work-queue udata) wdat)
- (process-work udata wdat)) ;; passing in wdat tells process-work to first process the passed in wdat
- ))
-
-(define (do-work udata wdat)
- #f)
-
-(define (process-work udata #!optional wdat)
- (if wdat (do-work udata wdat)) ;; process wdat
- (let ((wqueue (udat-work-queue udata)))
- (if (not (queue-empty? wqueue))
- (let loop ((wd (queue-remove! wqueue)))
- (do-work udata wd)
- (if (not (queue-empty? wqueue))
- (loop (queue-remove! wqueue)))))))
-
-;;======================================================================
-;; Generic db handling
-;; setup a inmem db instance
-;; open connection to on-disk db
-;; sync on-disk db to inmem
-;; get lock in on-disk db for dbowner of this db
-;; put sync-proc, init-proc, on-disk handle, inmem handle in dbconn stuct
-;; return the stuct
-;;======================================================================
-
-(defstruct dbconn
- (fname #f)
- (inmem #f)
- (conn #f)
- (sync #f) ;; sync proc
- (init #f) ;; init proc
- (lastsync (current-seconds))
- )
-
-(defstruct dbinfo
- (initproc #f)
- (syncproc #f))
-
-;; open inmem and disk database
-;; init with initproc
-;; return db struct
-;;
-;; appname; megatest, ulex or something else.
-;;
-(define (setup-db-connection udata fname-in appname dbtype)
- (let* ((is-ulex (eq? appname 'ulex))
- (dbinf (if is-ulex ;; ulex is a built-in special case
- (make-dbinfo initproc: ulexdb-init syncproc: ulexdb-sync)
- (hash-table-ref/default (udat-dbtypes udata) dbtype #f)))
- (initproc (dbinfo-initproc dbinf))
- (syncproc (dbinfo-syncproc dbinf))
- (fname (if is-ulex
- (conc (udat-ulex-dir udata) "/ulex.db")
- fname-in))
- (inmem-db (open-and-initdb udata #f 'inmem (dbinfo-initproc dbinf)))
- (disk-db (open-and-initdb udata fname 'disk (dbinfo-initproc dbinf))))
- (make-dbconn inmem: inmem-db conn: disk-db sync: syncproc init: initproc)))
-
-;; dest='inmem or 'disk
-;;
-(define (open-and-initdb udata filename dest init-proc)
- (let* ((inmem (eq? dest 'inmem))
- (dbfile (if inmem
- ":INMEM:"
- filename))
- (dbexists (if inmem #t (file-exists? dbfile)))
- (db (sqlite3:open-database dbfile)))
- (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
- (if (not dbexists)
- (init-proc db))
- db))
-
-
-;;======================================================================
-;; Previous Ulex db stuff
-;;======================================================================
-
-(define (ulexdb-init db inmem)
- (sqlite3:with-transaction
- db
- (lambda ()
- (for-each
- (lambda (stmt)
- (if stmt (sqlite3:execute db stmt)))
- `("CREATE TABLE IF NOT EXISTS processes
- (id INTEGER PRIMARY KEY,
- host TEXT NOT NULL,
- ipadr TEXT NOT NULL,
- port INTEGER NOT NULL,
- pid INTEGER NOT NULL,
- regtime INTEGER DEFAULT (strftime('%s','now')),
- last_update INTEGER DEFAULT (strftime('%s','now')));"
- (if inmem
- "CREATE TRIGGER IF NOT EXISTS update_proces_trigger AFTER UPDATE ON processes
- FOR EACH ROW
- BEGIN
- UPDATE processes SET last_update=(strftime('%s','now'))
- WHERE id=old.id;
- END;"
- #f))))))
-
-;; open databases, do initial sync
-(define (ulexdb-sync dbconndat udata)
- #f)
-
-
-) ;; END OF ULEX
-
-
-;;; ;;======================================================================
-;;; ;; D E B U G H E L P E R S
-;;; ;;======================================================================
-;;;
-;;; (define (dbg> . args)
-;;; (with-output-to-port (current-error-port)
-;;; (lambda ()
-;;; (apply print "dbg> " args))))
-;;;
-;;; (define (debug-pp . args)
-;;; (if (get-environment-variable "ULEX_DEBUG")
-;;; (with-output-to-port (current-error-port)
-;;; (lambda ()
-;;; (apply pp args)))))
-;;;
-;;; (define *default-debug-port* (current-error-port))
-;;;
-;;; (define (sdbg> fn stage-name stage-start stage-end start-time . message)
-;;; (if (get-environment-variable "ULEX_DEBUG")
-;;; (with-output-to-port *default-debug-port*
-;;; (lambda ()
-;;; (apply print "ulex:" fn " " stage-name " took " (- (if stage-end stage-end (current-milliseconds)) stage-start) " ms. "
-;;; (if start-time
-;;; (conc "total time " (- (current-milliseconds) start-time)
-;;; " ms.")
-;;; "")
-;;; message
-;;; )))))
-
-;;======================================================================
-;; M A C R O S
-;;======================================================================
-;; iup callbacks are not dumping the stack, this is a work-around
-;;
-
-;; Some of these routines use:
-;;
-;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html
-;;
-;; Syntax for defining macros in a simple style similar to function definiton,
-;; when there is a single pattern for the argument list and there are no keywords.
-;;
-;; (define-simple-syntax (name arg ...) body ...)
-;;
-;;
-;; (define-syntax define-simple-syntax
-;; (syntax-rules ()
-;; ((_ (name arg ...) body ...)
-;; (define-syntax name (syntax-rules () ((name arg ...) (begin body ...)))))))
-;;
-;; (define-simple-syntax (catch-and-dump proc procname)
-;; (handle-exceptions
-;; exn
-;; (begin
-;; (print-call-chain (current-error-port))
-;; (with-output-to-port (current-error-port)
-;; (lambda ()
-;; (print ((condition-property-accessor 'exn 'message) exn))
-;; (print "Callback error in " procname)
-;; (print "Full condition info:\n" (condition->list exn)))))
-;; (proc)))
-;;
-;;
-;;======================================================================
-;; R E C O R D S
-;;======================================================================
-
-;;; ;; information about me as a server
-;;; ;;
-;;; (defstruct area
-;;; ;; about this area
-;;; (useportlogger #f)
-;;; (lowport 32768)
-;;; (server-type 'auto) ;; auto=create up to five servers/pkts, main=create pkts, passive=no pkt (unless there are no pkts at all)
-;;; (conn #f)
-;;; (port #f)
-;;; (myaddr (get-my-best-address))
-;;; pktid ;; get pkt from hosts table if needed
-;;; pktfile
-;;; pktsdir
-;;; dbdir
-;;; (dbhandles (make-hash-table)) ;; fname => list-of-dbh, NOTE: Should really never need more than one?
-;;; (mutex (make-mutex))
-;;; (rtable (make-hash-table)) ;; registration table of available actions
-;;; (dbs (make-hash-table)) ;; filename => random number, used for choosing what dbs I serve
-;;; ;; about other servers
-;;; (hosts (make-hash-table)) ;; key => hostdat
-;;; (hoststats (make-hash-table)) ;; key => alist of fname => ( qcount . qtime )
-;;; (reqs (make-hash-table)) ;; uri => queue
-;;; ;; work queues
-;;; (wqueues (make-hash-table)) ;; fname => qdat
-;;; (stats (make-hash-table)) ;; fname => totalqueries
-;;; (last-srvup (current-seconds)) ;; last time we updated the known servers
-;;; (cookie2mbox (make-hash-table)) ;; map cookie for outstanding request to mailbox of awaiting call
-;;; (ready #f)
-;;; (health (make-hash-table)) ;; ipaddr:port => num failed pings since last good ping
-;;; )
-;;;
-;;; ;; host stats
-;;; ;;
-;;; (defstruct hostdat
-;;; (pkt #f)
-;;; (dbload (make-hash-table)) ;; "dbfile.db" => queries/min
-;;; (hostload #f) ;; normalized load ( 5min load / numcpus )
-;;; )
-;;;
-;;; ;; dbdat
-;;; ;;
-;;; (defstruct dbdat
-;;; (dbh #f)
-;;; (fname #f)
-;;; (write-access #f)
-;;; (sths (make-hash-table)) ;; hash mapping query strings to handles
-;;; )
-;;;
-;;; ;; qdat
-;;; ;;
-;;; (defstruct qdat
-;;; (writeq (make-queue))
-;;; (readq (make-queue))
-;;; (rwq (make-queue))
-;;; (logq (make-queue)) ;; do we need a queue for logging? yes, if we use sqlite3 db for logging
-;;; (osshort (make-queue))
-;;; (oslong (make-queue))
-;;; (misc (make-queue)) ;; used for things like ping-full
-;;; )
-;;;
-;;; ;; calldat
-;;; ;;
-;;; (defstruct calldat
-;;; (ctype 'dbwrite)
-;;; (obj #f) ;; this would normally be an SQL statement e.g. SELECT, INSERT etc.
-;;; (rtime (current-milliseconds)))
-;;;
-;;; ;; make it a global? Well, it is local to area module
-;;;
-;;; (define *pktspec*
-;;; `((server (hostname . h)
-;;; (port . p)
-;;; (pid . i)
-;;; (ipaddr . a)
-;;; )
-;;; (data (hostname . h) ;; sender hostname
-;;; (port . p) ;; sender port
-;;; (ipaddr . a) ;; sender ip
-;;; (hostkey . k) ;; sending host key - store info at server under this key
-;;; (servkey . s) ;; server key - this needs to match at server end or reject the msg
-;;; (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json
-;;; (data . d) ;; base64 encoded slln data
-;;; )))
-;;;
-;;; ;; work item
-;;; ;;
-;;; (defstruct witem
-;;; (rhost #f) ;; return host
-;;; (ripaddr #f) ;; return ipaddr
-;;; (rport #f) ;; return port
-;;; (servkey #f) ;; the packet representing the client of this workitem, used by final send-message
-;;; (rdat #f) ;; the request - usually an sql query, type is rdat
-;;; (action #f) ;; the action: immediate, dbwrite, dbread,oslong, osshort
-;;; (cookie #f) ;; cookie id for response
-;;; (data #f) ;; the data payload, i.e. parameters
-;;; (result #f) ;; the result from processing the data
-;;; (caller #f)) ;; the calling peer according to rpc itself
-;;;
-;;; (define (trim-pktid pktid)
-;;; (if (string? pktid)
-;;; (substring pktid 0 4)
-;;; "nopkt"))
-;;;
-;;; (define (any->number num)
-;;; (cond
-;;; ((number? num) num)
-;;; ((string? num) (string->number num))
-;;; (else num)))
-;;;
-;;; (use trace)
-;;; (trace-call-sites #t)
-;;;
-;;; ;;======================================================================
-;;; ;; D A T A B A S E H A N D L I N G
-;;; ;;======================================================================
-;;;
-;;; ;; look in dbhandles for a db, return it, else return #f
-;;; ;;
-;;; (define (get-dbh acfg fname)
-;;; (let ((dbh-lst (hash-table-ref/default (area-dbhandles acfg) fname '())))
-;;; (if (null? dbh-lst)
-;;; (begin
-;;; ;; (print "opening db for " fname)
-;;; (open-db acfg fname)) ;; Note that the handles get put back in the queue in the save-dbh calls
-;;; (let ((rem-lst (cdr dbh-lst)))
-;;; ;; (print "re-using saved connection for " fname)
-;;; (hash-table-set! (area-dbhandles acfg) fname rem-lst)
-;;; (car dbh-lst)))))
-;;;
-;;; (define (save-dbh acfg fname dbdat)
-;;; ;; (print "saving dbh for " fname)
-;;; (hash-table-set! (area-dbhandles acfg) fname (cons dbdat (hash-table-ref/default (area-dbhandles acfg) fname '()))))
-;;;
-;;; ;; open the database, if never before opened init it. put the handle in the
-;;; ;; open db's hash table
-;;; ;; returns: the dbdat
-;;; ;;
-;;; (define (open-db acfg fname)
-;;; (let* ((fullname (conc (area-dbdir acfg) "/" fname))
-;;; (exists (file-exists? fullname))
-;;; (write-access (if exists
-;;; (file-write-access? fullname)
-;;; (file-write-access? (area-dbdir acfg))))
-;;; (db (sqlite3:open-database fullname))
-;;; (handler (sqlite3:make-busy-timeout 136000))
-;;; )
-;;; (sqlite3:set-busy-handler! db handler)
-;;; (sqlite3:execute db "PRAGMA synchronous = 0;")
-;;; (if (not exists) ;; need to init the db
-;;; (if write-access
-;;; (let ((isql (get-rsql acfg 'dbinitsql))) ;; get the init sql statements
-;;; ;; (sqlite3:with-transaction
-;;; ;; db
-;;; ;; (lambda ()
-;;; (if isql
-;;; (for-each
-;;; (lambda (sql)
-;;; (sqlite3:execute db sql))
-;;; isql)))
-;;; (print "ERROR: no write access to " (area-dbdir acfg))))
-;;; (make-dbdat dbh: db fname: fname write-access: write-access)))
-;;;
-;;; ;; This is a low-level command to retrieve or to prepare, save and return a prepared statment
-;;; ;; you must extract the db handle
-;;; ;;
-;;; (define (get-sth db cache stmt)
-;;; (if (hash-table-exists? cache stmt)
-;;; (begin
-;;; ;; (print "Reusing cached stmt for " stmt)
-;;; (hash-table-ref/default cache stmt #f))
-;;; (let ((sth (sqlite3:prepare db stmt)))
-;;; (hash-table-set! cache stmt sth)
-;;; ;; (print "prepared stmt for " stmt)
-;;; sth)))
-;;;
-;;; ;; a little more expensive but does all the tedious deferencing - only use if you don't already
-;;; ;; have dbdat and db sitting around
-;;; ;;
-;;; (define (full-get-sth acfg fname stmt)
-;;; (let* ((dbdat (get-dbh acfg fname))
-;;; (db (dbdat-dbh dbdat))
-;;; (sths (dbdat-sths dbdat)))
-;;; (get-sth db sths stmt)))
-;;;
-;;; ;; write to a db
-;;; ;; acfg: area data
-;;; ;; rdat: request data
-;;; ;; hdat: (host . port)
-;;; ;;
-;;; ;; (define (dbwrite acfg rdat hdat data-in)
-;;; ;; (let* ((dbname (car data-in))
-;;; ;; (dbdat (get-dbh acfg dbname))
-;;; ;; (db (dbdat-dbh dbdat))
-;;; ;; (sths (dbdat-sths dbdat))
-;;; ;; (stmt (calldat-obj rdat))
-;;; ;; (sth (get-sth db sths stmt))
-;;; ;; (data (cdr data-in)))
-;;; ;; (print "dbname: " dbname " acfg: " acfg " rdat: " (calldat->alist rdat) " hdat: " hdat " data: " data)
-;;; ;; (print "dbdat: " (dbdat->alist dbdat))
-;;; ;; (apply sqlite3:execute sth data)
-;;; ;; (save-dbh acfg dbname dbdat)
-;;; ;; #t
-;;; ;; ))
-;;;
-;;; (define (finalize-all-db-handles acfg)
-;;; (let* ((dbhandles (area-dbhandles acfg)) ;; dbhandles is hash of fname ==> dbdat
-;;; (num 0))
-;;; (for-each
-;;; (lambda (area-name)
-;;; (print "Closing handles for " area-name)
-;;; (let ((dbdats (hash-table-ref/default dbhandles area-name '())))
-;;; (for-each
-;;; (lambda (dbdat)
-;;; ;; first close all statement handles
-;;; (for-each
-;;; (lambda (sth)
-;;; (sqlite3:finalize! sth)
-;;; (set! num (+ num 1)))
-;;; (hash-table-values (dbdat-sths dbdat)))
-;;; ;; now close the dbh
-;;; (set! num (+ num 1))
-;;; (sqlite3:finalize! (dbdat-dbh dbdat)))
-;;; dbdats)))
-;;; (hash-table-keys dbhandles))
-;;; (print "FINALIZED " num " dbhandles")))
-;;;
-;;; ;;======================================================================
-;;; ;; W O R K Q U E U E H A N D L I N G
-;;; ;;======================================================================
-;;;
-;;; (define (register-db-as-mine acfg dbname)
-;;; (let ((ht (area-dbs acfg)))
-;;; (if (not (hash-table-ref/default ht dbname #f))
-;;; (hash-table-set! ht dbname (random 10000)))))
-;;;
-;;; (define (work-queue-add acfg fname witem)
-;;; (let* ((work-queue-start (current-milliseconds))
-;;; (action (witem-action witem)) ;; NB the action is the index into the rdat actions
-;;; (qdat (or (hash-table-ref/default (area-wqueues acfg) fname #f)
-;;; (let ((newqdat (make-qdat)))
-;;; (hash-table-set! (area-wqueues acfg) fname newqdat)
-;;; newqdat)))
-;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f)))
-;;; (if rdat
-;;; (queue-add!
-;;; (case (calldat-ctype rdat)
-;;; ((dbwrite) (register-db-as-mine acfg fname)(qdat-writeq qdat))
-;;; ((dbread) (register-db-as-mine acfg fname)(qdat-readq qdat))
-;;; ((dbrw) (register-db-as-mine acfg fname)(qdat-rwq qdat))
-;;; ((oslong) (qdat-oslong qdat))
-;;; ((osshort) (qdat-osshort qdat))
-;;; ((full-ping) (qdat-misc qdat))
-;;; (else
-;;; (print "ERROR: no queue for " action ". Adding to dbwrite queue.")
-;;; (qdat-writeq qdat)))
-;;; witem)
-;;; (case action
-;;; ((full-ping)(qdat-misc qdat))
-;;; (else
-;;; (print "ERROR: No action " action " was registered"))))
-;;; (sdbg> "work-queue-add" "queue-add" work-queue-start #f #f)
-;;; #t)) ;; for now, simply return #t to indicate request got to the queue
-;;;
-;;; (define (doqueue acfg q fname dbdat dbh)
-;;; ;; (print "doqueue: " fname)
-;;; (let* ((start-time (current-milliseconds))
-;;; (qlen (queue-length q)))
-;;; (if (> qlen 1)
-;;; (print "Processing queue of length " qlen))
-;;; (let loop ((count 0)
-;;; (responses '()))
-;;; (let ((delta (- (current-milliseconds) start-time)))
-;;; (if (or (queue-empty? q)
-;;; (> delta 400)) ;; stop working on this queue after 400ms have passed
-;;; (list count delta responses) ;; return count, delta and responses list
-;;; (let* ((witem (queue-remove! q))
-;;; (action (witem-action witem))
-;;; (rdat (witem-rdat witem))
-;;; (stmt (calldat-obj rdat))
-;;; (sth (full-get-sth acfg fname stmt))
-;;; (ctype (calldat-ctype rdat))
-;;; (data (witem-data witem))
-;;; (cookie (witem-cookie witem)))
-;;; ;; do the processing and save the result in witem-result
-;;; (witem-result-set!
-;;; witem
-;;; (case ctype ;; action
-;;; ((noblockwrite) ;; blind write, no ack of success returned
-;;; (apply sqlite3:execute sth data)
-;;; (sqlite3:last-insert-rowid dbh))
-;;; ((dbwrite) ;; blocking write
-;;; (apply sqlite3:execute sth data)
-;;; #t)
-;;; ((dbread) ;; TODO: consider breaking this up and shipping in pieces for large query
-;;; (apply sqlite3:map-row (lambda x x) sth data))
-;;; ((full-ping) 'full-ping)
-;;; (else (print "Not ready for action " action) #f)))
-;;; (loop (add1 count)
-;;; (if cookie
-;;; (cons witem responses)
-;;; responses))))))))
-;;;
-;;; ;; do up to 400ms of processing on each queue
-;;; ;; - the work-queue-processor will allow the max 1200ms of work to complete but it will flag as overloaded
-;;; ;;
-;;; (define (process-db-queries acfg fname)
-;;; (if (hash-table-exists? (area-wqueues acfg) fname)
-;;; (let* ((process-db-queries-start-time (current-milliseconds))
-;;; (qdat (hash-table-ref/default (area-wqueues acfg) fname #f))
-;;; (queue-sym->queue (lambda (queue-sym)
-;;; (case queue-sym ;; lookup the queue from qdat given a name (symbol)
-;;; ((wqueue) (qdat-writeq qdat))
-;;; ((rqueue) (qdat-readq qdat))
-;;; ((rwqueue) (qdat-rwq qdat))
-;;; ((misc) (qdat-misc qdat))
-;;; (else #f))))
-;;; (dbdat (get-dbh acfg fname))
-;;; (dbh (if (dbdat? dbdat)(dbdat-dbh dbdat) #f))
-;;; (nowtime (current-seconds)))
-;;; ;; handle the queues that require a transaction
-;;; ;;
-;;; (map ;;
-;;; (lambda (queue-sym)
-;;; ;; (print "processing queue " queue-sym)
-;;; (let* ((queue (queue-sym->queue queue-sym)))
-;;; (if (not (queue-empty? queue))
-;;; (let ((responses
-;;; (sqlite3:with-transaction ;; todo - catch exceptions...
-;;; dbh
-;;; (lambda ()
-;;; (let* ((res (doqueue acfg queue fname dbdat dbh))) ;; this does the work!
-;;; ;; (print "res=" res)
-;;; (match res
-;;; ((count delta responses)
-;;; (update-stats acfg fname queue-sym delta count)
-;;; (sdbg> "process-db-queries" "sqlite3-transaction" process-db-queries-start-time #f #f)
-;;; responses) ;; return responses
-;;; (else
-;;; (print "ERROR: bad return data from doqueue " res)))
-;;; )))))
-;;; ;; having completed the transaction, send the responses.
-;;; ;; (print "INFO: sending " (length responses) " responses.")
-;;; (let loop ((responses-left responses))
-;;; (cond
-;;; ((null? responses-left) #t)
-;;; (else
-;;; (let* ((witem (car responses-left))
-;;; (response (cdr responses-left)))
-;;; (call-deliver-response acfg (witem-ripaddr witem)(witem-rport witem)
-;;; (witem-cookie witem)(witem-result witem)))
-;;; (loop (cdr responses-left))))))
-;;; )))
-;;; '(wqueue rwqueue rqueue))
-;;;
-;;; ;; handle misc queue
-;;; ;;
-;;; ;; (print "processing misc queue")
-;;; (let ((queue (queue-sym->queue 'misc)))
-;;; (doqueue acfg queue fname dbdat dbh))
-;;; ;; ....
-;;; (save-dbh acfg fname dbdat)
-;;; #t ;; just to let the tests know we got here
-;;; )
-;;; #f ;; nothing processed
-;;; ))
-;;;
-;;; ;; run all queues in parallel per db but sequentially per queue for that db.
-;;; ;; - process the queues every 500 or so ms
-;;; ;; - allow for long running queries to continue but all other activities for that
-;;; ;; db will be blocked.
-;;; ;;
-;;; (define (work-queue-processor acfg)
-;;; (let* ((threads (make-hash-table))) ;; fname => thread
-;;; (let loop ((fnames (hash-table-keys (area-wqueues acfg)))
-;;; (target-time (+ (current-milliseconds) 50)))
-;;; ;;(if (not (null? fnames))(print "Processing for these databases: " fnames))
-;;; (for-each
-;;; (lambda (fname)
-;;; ;; (print "processing for " fname)
-;;; ;;(process-db-queries acfg fname))
-;;; (let ((th (hash-table-ref/default threads fname #f)))
-;;; (if (and th (not (member (thread-state th) '(dead terminated))))
-;;; (begin
-;;; (print "WARNING: worker thread for " fname " is taking a long time.")
-;;; (print "Thread is in state " (thread-state th)))
-;;; (let ((th1 (make-thread (lambda ()
-;;; ;; (catch-and-dump
-;;; ;; (lambda ()
-;;; ;; (print "Process queries for " fname)
-;;; (let ((start-time (current-milliseconds)))
-;;; (process-db-queries acfg fname)
-;;; ;; (thread-sleep! 0.01) ;; need the thread to take at least some time
-;;; (hash-table-delete! threads fname)) ;; no mutexes?
-;;; fname)
-;;; "th1"))) ;; ))
-;;; (hash-table-set! threads fname th1)
-;;; (thread-start! th1)))))
-;;; fnames)
-;;; ;; (thread-sleep! 0.1) ;; give the threads some time to process requests
-;;; ;; burn time until 400ms is up
-;;; (let ((now-time (current-milliseconds)))
-;;; (if (< now-time target-time)
-;;; (let ((delta (- target-time now-time)))
-;;; (thread-sleep! (/ delta 1000)))))
-;;; (loop (hash-table-keys (area-wqueues acfg))
-;;; (+ (current-milliseconds) 50)))))
-;;;
-;;; ;;======================================================================
-;;; ;; S T A T S G A T H E R I N G
-;;; ;;======================================================================
-;;;
-;;; (defstruct stat
-;;; (qcount-avg 0) ;; coarse running average
-;;; (qtime-avg 0) ;; coarse running average
-;;; (qcount 0) ;; total
-;;; (qtime 0) ;; total
-;;; (last-qcount 0) ;; last
-;;; (last-qtime 0) ;; last
-;;; (dbs '()) ;; list of db files handled by this node
-;;; (when 0)) ;; when the last query happened - seconds
-;;;
-;;;
-;;; (define (update-stats acfg fname bucket duration numqueries)
-;;; (let* ((key fname) ;; for now do not use bucket. Was: (conc fname "-" bucket)) ;; lazy but good enough
-;;; (stats (or (hash-table-ref/default (area-stats acfg) key #f)
-;;; (let ((newstats (make-stat)))
-;;; (hash-table-set! (area-stats acfg) key newstats)
-;;; newstats))))
-;;; ;; when the last query happended (used to remove the fname from the active list)
-;;; (stat-when-set! stats (current-seconds))
-;;; ;; last values
-;;; (stat-last-qcount-set! stats numqueries)
-;;; (stat-last-qtime-set! stats duration)
-;;; ;; total over process lifetime
-;;; (stat-qcount-set! stats (+ (stat-qcount stats) numqueries))
-;;; (stat-qtime-set! stats (+ (stat-qtime stats) duration))
-;;; ;; coarse average
-;;; (stat-qcount-avg-set! stats (/ (+ (stat-qcount-avg stats) numqueries) 2))
-;;; (stat-qtime-avg-set! stats (/ (+ (stat-qtime-avg stats) duration) 2))
-;;;
-;;; ;; here is where we add the stats for a given dbfile
-;;; (if (not (member fname (stat-dbs stats)))
-;;; (stat-dbs-set! stats (cons fname (stat-dbs stats))))
-;;;
-;;; ))
-;;;
-;;; ;;======================================================================
-;;; ;; S E R V E R S T U F F
-;;; ;;======================================================================
-;;;
-;;; ;; this does NOT return!
-;;; ;;
-;;; (define (find-free-port-and-open acfg)
-;;; (let ((port (or (area-port acfg) 3200)))
-;;; (handle-exceptions
-;;; exn
-;;; (begin
-;;; (print "INFO: cannot bind to port " (rpc:default-server-port) ", trying next port")
-;;; (area-port-set! acfg (+ port 1))
-;;; (find-free-port-and-open acfg))
-;;; (rpc:default-server-port port)
-;;; (area-port-set! acfg port)
-;;; (tcp-read-timeout 120000)
-;;; ;; ((rpc:make-server (tcp-listen port)) #t)
-;;; (tcp-listen (rpc:default-server-port)
-;;; ))))
-;;;
-;;; ;; register this node by putting a packet into the pkts dir.
-;;; ;; look for other servers
-;;; ;; contact other servers and compile list of servers
-;;; ;; there are two types of server
-;;; ;; main servers - dashboards, runners and dedicated servers - need pkt
-;;; ;; passive servers - test executers, step calls, list-runs - no pkt
-;;; ;;
-;;; (define (register-node acfg hostip port-num)
-;;; ;;(mutex-lock! (area-mutex acfg))
-;;; (let* ((server-type (area-server-type acfg)) ;; auto, main, passive (no pkt created)
-;;; (best-ip (or hostip (get-my-best-address)))
-;;; (mtdir (area-dbdir acfg))
-;;; (pktdir (area-pktsdir acfg))) ;; conc mtdir "/.server-pkts")))
-;;; (print "Registering node " best-ip ":" port-num)
-;;; (if (not mtdir) ;; require a home for this node to put or find databases
-;;; #f
-;;; (begin
-;;; (if (not (directory? pktdir))(create-directory pktdir))
-;;; ;; server is started, now create pkt if needed
-;;; (print "Starting server in " server-type " mode with port " port-num)
-;;; (if (member server-type '(auto main)) ;; TODO: if auto, count number of servers registers, if > 3 then don't put out a pkt
-;;; (begin
-;;; (area-pktid-set! acfg
-;;; (write-alist->pkt
-;;; pktdir
-;;; `((hostname . ,(get-host-name))
-;;; (ipaddr . ,best-ip)
-;;; (port . ,port-num)
-;;; (pid . ,(current-process-id)))
-;;; pktspec: *pktspec*
-;;; ptype: 'server))
-;;; (area-pktfile-set! acfg (conc pktdir "/" (area-pktid acfg) ".pkt"))))
-;;; (area-port-set! acfg port-num)
-;;; #;(mutex-unlock! (area-mutex acfg))))))
-;;;
-;;; (define *cookie-seqnum* 0)
-;;; (define (make-cookie key)
-;;; (set! *cookie-seqnum* (add1 *cookie-seqnum*))
-;;; ;;(print "MAKE COOKIE CALLED -- on "servkey"-"*cookie-seqnum*)
-;;; (conc key "-" *cookie-seqnum*)
-;;; )
-;;;
-;;; ;; dispatch locally if possible
-;;; ;;
-;;; (define (call-deliver-response acfg ipaddr port cookie data)
-;;; (if (and (equal? (area-myaddr acfg) ipaddr)
-;;; (equal? (area-port acfg) port))
-;;; (deliver-response acfg cookie data)
-;;; ((rpc:procedure 'response ipaddr port) cookie data)))
-;;;
-;;; (define (deliver-response acfg cookie data)
-;;; (let ((deliver-response-start (current-milliseconds)))
-;;; (thread-start! (make-thread
-;;; (lambda ()
-;;; (let loop ((tries-left 5))
-;;; ;;(print "TOP OF DELIVER_RESPONSE LOOP; triesleft="tries-left)
-;;; ;;(pp (hash-table->alist (area-cookie2mbox acfg)))
-;;; (let* ((mbox (hash-table-ref/default (area-cookie2mbox acfg) cookie #f)))
-;;; (cond
-;;; ((eq? 0 tries-left)
-;;; (print "ulex:deliver-response: I give up. Mailbox never appeared. cookie="cookie)
-;;; )
-;;; (mbox
-;;; ;;(print "got mbox="mbox" got data="data" send.")
-;;; (mailbox-send! mbox data))
-;;; (else
-;;; ;;(print "no mbox yet. look for "cookie)
-;;; (thread-sleep! (/ (- 6 tries-left) 10))
-;;; (loop (sub1 tries-left))))))
-;;; ;; (debug-pp (list (conc "ulex:deliver-response took " (- (current-milliseconds) deliver-response-start) " ms, cookie=" cookie " data=") data))
-;;; (sdbg> "deliver-response" "mailbox-send" deliver-response-start #f #f cookie)
-;;; )
-;;; (conc "deliver-response thread for cookie="cookie))))
-;;; #t)
-;;;
-;;; ;; action:
-;;; ;; immediate - quick actions, no need to put in queues
-;;; ;; dbwrite - put in dbwrite queue
-;;; ;; dbread - put in dbread queue
-;;; ;; oslong - os actions, e.g. du, that could take a long time
-;;; ;; osshort - os actions that should be quick, e.g. df
-;;; ;;
-;;; (define (request acfg from-ipaddr from-port servkey action cookie fname params) ;; std-peer-handler
-;;; ;; NOTE: Use rpc:current-peer for getting return address
-;;; (let* ((std-peer-handler-start (current-milliseconds))
-;;; ;; (raw-data (alist-ref 'data dat))
-;;; (rdat (hash-table-ref/default
-;;; (area-rtable acfg) action #f)) ;; this looks up the sql query or other details indexed by the action
-;;; (witem (make-witem ripaddr: from-ipaddr ;; rhost: from-host
-;;; rport: from-port action: action
-;;; rdat: rdat cookie: cookie
-;;; servkey: servkey data: params ;; TODO - rename data to params
-;;; caller: (rpc:current-peer))))
-;;; (if (not (equal? servkey (area-pktid acfg)))
-;;; `(#f . ,(conc "I don't know you servkey=" servkey ", pktid=" (area-pktid acfg))) ;; immediately return this
-;;; (let* ((ctype (if rdat
-;;; (calldat-ctype rdat) ;; is this necessary? these should be identical
-;;; action)))
-;;; (sdbg> "std-peer-handler" "immediate" std-peer-handler-start #f #f)
-;;; (case ctype
-;;; ;; (dbwrite acfg rdat (cons from-ipaddr from-port) data)))
-;;; ((full-ping) `(#t "ack to full ping" ,(work-queue-add acfg fname witem) ,cookie))
-;;; ((response) `(#t "ack from requestor" ,(deliver-response acfg fname params)))
-;;; ((dbwrite) `(#t "db write submitted" ,(work-queue-add acfg fname witem) ,cookie))
-;;; ((dbread) `(#t "db read submitted" ,(work-queue-add acfg fname witem) ,cookie ))
-;;; ((dbrw) `(#t "db read/write submitted" ,cookie))
-;;; ((osshort) `(#t "os short submitted" ,cookie))
-;;; ((oslong) `(#t "os long submitted" ,cookie))
-;;; (else `(#f "unrecognised action" ,ctype)))))))
-;;;
-;;; ;; Call this to start the actual server
-;;; ;;
-;;; ;; start_server
-;;; ;;
-;;; ;; mode: '
-;;; ;; handler: proc which takes pktrecieved as argument
-;;; ;;
-;;;
-;;; (define (start-server acfg)
-;;; (let* ((conn (find-free-port-and-open acfg))
-;;; (port (area-port acfg)))
-;;; (rpc:publish-procedure!
-;;; 'delist-db
-;;; (lambda (fname)
-;;; (hash-table-delete! (area-dbs acfg) fname)))
-;;; (rpc:publish-procedure!
-;;; 'calling-addr
-;;; (lambda ()
-;;; (rpc:current-peer)))
-;;; (rpc:publish-procedure!
-;;; 'ping
-;;; (lambda ()(real-ping acfg)))
-;;; (rpc:publish-procedure!
-;;; 'request
-;;; (lambda (from-addr from-port servkey action cookie dbname params)
-;;; (request acfg from-addr from-port servkey action cookie dbname params)))
-;;; (rpc:publish-procedure!
-;;; 'response
-;;; (lambda (cookie res-dat)
-;;; (deliver-response acfg cookie res-dat)))
-;;; (area-ready-set! acfg #t)
-;;; (area-conn-set! acfg conn)
-;;; ((rpc:make-server conn) #f)));; ((tcp-listen (rpc:default-server-port)) #t)
-;;;
-;;;
-;;; (define (launch acfg) ;; #!optional (proc std-peer-handler))
-;;; (print "starting launch")
-;;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers)
-;;; #;(let ((original-handler (current-exception-handler))) ;; is th
-;;; (lambda (exception)
-;;; (server-exit-procedure)
-;;; (original-handler exception)))
-;;; (on-exit (lambda ()
-;;; (shutdown acfg))) ;; (finalize-all-db-handles acfg)))
-;;; ;; set up the rpc handler
-;;; (let* ((th1 (make-thread
-;;; (lambda ()(start-server acfg))
-;;; "server thread"))
-;;; (th2 (make-thread
-;;; (lambda ()
-;;; (print "th2 starting")
-;;; (let loop ()
-;;; (work-queue-processor acfg)
-;;; (print "work-queue-processor crashed!")
-;;; (loop)))
-;;; "work queue thread")))
-;;; (thread-start! th1)
-;;; (thread-start! th2)
-;;; (let loop ()
-;;; (thread-sleep! 0.025)
-;;; (if (area-ready acfg)
-;;; #t
-;;; (loop)))
-;;; ;; attempt to fix my address
-;;; (let* ((all-addr (get-all-ips-sorted))) ;; could use (tcp-addresses conn)?
-;;; (let loop ((rem-addrs all-addr))
-;;; (if (null? rem-addrs)
-;;; (begin
-;;; (print "ERROR: Failed to figure out the ip address of myself as a server. Giving up.")
-;;; (exit 1)) ;; BUG Changeme to raising an exception
-;;;
-;;; (let* ((addr (car rem-addrs))
-;;; (good-addr (handle-exceptions
-;;; exn
-;;; #f
-;;; ((rpc:procedure 'calling-addr addr (area-port acfg))))))
-;;; (if good-addr
-;;; (begin
-;;; (print "Got good-addr of " good-addr)
-;;; (area-myaddr-set! acfg good-addr))
-;;; (loop (cdr rem-addrs)))))))
-;;; (register-node acfg (area-myaddr acfg)(area-port acfg))
-;;; (print "INFO: Server started on " (area-myaddr acfg) ":" (area-port acfg))
-;;; ;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers)
-;;; ))
-;;;
-;;; (define (clear-server-pkt acfg)
-;;; (let ((pktf (area-pktfile acfg)))
-;;; (if pktf (delete-file* pktf))))
-;;;
-;;; (define (shutdown acfg)
-;;; (let (;;(conn (area-conn acfg))
-;;; (pktf (area-pktfile acfg))
-;;; (port (area-port acfg)))
-;;; (if pktf (delete-file* pktf))
-;;; (send-all "imshuttingdown")
-;;; ;; (rpc:close-all-connections!) ;; don't know if this is actually needed
-;;; (finalize-all-db-handles acfg)))
-;;;
-;;; (define (send-all msg)
-;;; #f)
-;;;
-;;; ;; given a area record look up all the packets
-;;; ;;
-;;; (define (get-all-server-pkts acfg)
-;;; (let ((all-pkt-files (glob (conc (area-pktsdir acfg) "/*.pkt"))))
-;;; (map (lambda (pkt-file)
-;;; (read-pkt->alist pkt-file pktspec: *pktspec*))
-;;; all-pkt-files)))
-;;;
-;;; #;((Z . "9a0212302295a19610d5796fce0370fa130758e9")
-;;; (port . "34827")
-;;; (pid . "28748")
-;;; (hostname . "zeus")
-;;; (T . "server")
-;;; (D . "1549427032.0"))
-;;;
-;;; #;(define (get-my-best-address)
-;;; (let ((all-my-addresses (get-all-ips))) ;; (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))))
-;;; (cond
-;;; ((null? all-my-addresses)
-;;; (get-host-name)) ;; no interfaces?
-;;; ((eq? (length all-my-addresses) 1)
-;;; (ip->string (car all-my-addresses))) ;; only one to choose from, just go with it
-;;; (else
-;;; (ip->string (car (filter (lambda (x) ;; take any but 127.
-;;; (not (eq? (u8vector-ref x 0) 127)))
-;;; all-my-addresses)))))))
-;;;
-;;; ;; whoami? I am my pkt
-;;; ;;
-;;; (define (whoami? acfg)
-;;; (hash-table-ref/default (area-hosts acfg)(area-pktid acfg) #f))
-;;;
-;;; ;;======================================================================
-;;; ;; "Client side" operations
-;;; ;;======================================================================
-;;;
-;;; (define (safe-call call-key host port . params)
-;;; (handle-exceptions
-;;; exn
-;;; (begin
-;;; (print "Call " call-key " to " host ":" port " failed")
-;;; #f)
-;;; (apply (rpc:procedure call-key host port) params)))
-;;;
-;;; ;; ;; convert to/from string / sexpr
-;;; ;;
-;;; ;; (define (string->sexpr str)
-;;; ;; (if (string? str)
-;;; ;; (with-input-from-string str read)
-;;; ;; str))
-;;; ;;
-;;; ;; (define (sexpr->string s)
-;;; ;; (with-output-to-string (lambda ()(write s))))
-;;;
-;;; ;; is the server alive?
-;;; ;;
-;;; (define (ping acfg host port)
-;;; (let* ((myaddr (area-myaddr acfg))
-;;; (myport (area-port acfg))
-;;; (start-time (current-milliseconds))
-;;; (res (if (and (equal? myaddr host)
-;;; (equal? myport port))
-;;; (real-ping acfg)
-;;; ((rpc:procedure 'ping host port)))))
-;;; (cons (- (current-milliseconds) start-time)
-;;; res)))
-;;;
-;;; ;; returns ( ipaddr port alist-fname=>randnum )
-;;; (define (real-ping acfg)
-;;; `(,(area-myaddr acfg) ,(area-port acfg) ,(get-host-stats acfg)))
-;;;
-;;; ;; is the server alive AND the queues processing?
-;;; ;;
-;;; #;(define (full-ping acfg servpkt)
-;;; (let* ((start-time (current-milliseconds))
-;;; (res (send-message acfg servpkt '(full-ping) 'full-ping)))
-;;; (cons (- (current-milliseconds) start-time)
-;;; res))) ;; (equal? res "got ping"))))
-;;;
-;;;
-;;; ;; look up all pkts and get the server id (the hash), port, host/ip
-;;; ;; store this info in acfg
-;;; ;; return the number of responsive servers found
-;;; ;;
-;;; ;; DO NOT VERIFY THAT THE SERVER IS ALIVE HERE. This is called at times where the current server is not yet alive and cannot ping itself
-;;; ;;
-;;; (define (update-known-servers acfg)
-;;; ;; readll all pkts
-;;; ;; foreach pkt; if it isn't me ping the server; if alive, add to hosts hash, else rm the pkt
-;;; (let* ((start-time (current-milliseconds))
-;;; (all-pkts (delete-duplicates
-;;; (append (get-all-server-pkts acfg)
-;;; (hash-table-values (area-hosts acfg)))))
-;;; (hostshash (area-hosts acfg))
-;;; (my-id (area-pktid acfg))
-;;; (pktsdir (area-pktsdir acfg)) ;; needed to remove pkts from non-responsive servers
-;;; (numsrvs 0)
-;;; (delpkt (lambda (pktsdir sid)
-;;; (print "clearing out server " sid)
-;;; (delete-file* (conc pktsdir "/" sid ".pkt"))
-;;; (hash-table-delete! hostshash sid))))
-;;; (area-last-srvup-set! acfg (current-seconds))
-;;; (for-each
-;;; (lambda (servpkt)
-;;; (if (list? servpkt)
-;;; ;; (pp servpkt)
-;;; (let* ((shost (alist-ref 'ipaddr servpkt))
-;;; (sport (any->number (alist-ref 'port servpkt)))
-;;; (res (handle-exceptions
-;;; exn
-;;; (begin
-;;; ;; (print "INFO: bad server on " shost ":" sport)
-;;; #f)
-;;; (ping acfg shost sport)))
-;;; (sid (alist-ref 'Z servpkt)) ;; Z code is our name for the server
-;;; (url (conc shost ":" sport))
-;;; )
-;;; #;(if (or (not res)
-;;; (null? res))
-;;; (begin
-;;; (print "STRANGE: ping of " url " gave " res)))
-;;;
-;;; ;; (print "Got " res " from " shost ":" sport)
-;;; (match res
-;;; ((qduration . payload)
-;;; ;; (print "Server pkt:" (alist-ref 'ipaddr servpkt) ":" (alist-ref 'port servpkt)
-;;; ;; (if payload
-;;; ;; "Success" "Fail"))
-;;; (match payload
-;;; ((host port stats)
-;;; ;; (print "From " host ":" port " got stats: " stats)
-;;; (if (and host port stats)
-;;; (let ((url (conc host ":" port)))
-;;; (hash-table-set! hostshash sid servpkt)
-;;; ;; store based on host:port
-;;; (hash-table-set! (area-hoststats acfg) sid stats))
-;;; (print "missing data from the server, not sure what that means!"))
-;;; (set! numsrvs (+ numsrvs 1)))
-;;; (#f
-;;; (print "Removing pkt " sid " due to #f from server or failed ping")
-;;; (delpkt pktsdir sid))
-;;; (else
-;;; (print "Got ")(pp res)(print " from server ")(pp servpkt) " but response did not match (#f/#t . msg)")))
-;;; (else
-;;; ;; here we delete the pkt - can't reach the server, remove it
-;;; ;; however this logic is inadequate. we should mark the server as checked
-;;; ;; and not good, if it happens a second time - then remove the pkt
-;;; ;; or something similar. I.e. don't be too quick to assume the server is wedged or dead
-;;; ;; could be it is simply too busy to reply
-;;; (let ((bad-pings (hash-table-ref/default (area-health acfg) url 0)))
-;;; (if (> bad-pings 1) ;; two bad pings - remove pkt
-;;; (begin
-;;; (print "INFO: " bad-pings " bad responses from " url ", deleting pkt " sid)
-;;; (delpkt pktsdir sid))
-;;; (begin
-;;; (print "INFO: " bad-pings " bad responses from " shost ":" sport " not deleting pkt yet")
-;;; (hash-table-set! (area-health acfg)
-;;; url
-;;; (+ (hash-table-ref/default (area-health acfg) url 0) 1))
-;;; ))
-;;; ))))
-;;; ;; servpkt is not actually a pkt?
-;;; (begin
-;;; (print "Bad pkt " servpkt))))
-;;; all-pkts)
-;;; (sdbg> "update-known-servers" "end" start-time #f #f " found " numsrvs
-;;; " servers, pkts: " (map (lambda (p)
-;;; (alist-ref 'Z p))
-;;; all-pkts))
-;;; numsrvs))
-;;;
-;;; (defstruct srvstat
-;;; (numfiles 0) ;; number of db files handled by this server - subtract 1 for the db being currently looked at
-;;; (randnum #f) ;; tie breaker number assigned to by the server itself - applies only to the db under consideration
-;;; (pkt #f)) ;; the server pkt
-;;;
-;;; ;;(define (srv->srvstat srvpkt)
-;;;
-;;; ;; Get the server best for given dbname and key
-;;; ;;
-;;; ;; NOTE: key is not currently used. The key points to the kind of query, this may be useful for directing read-only queries.
-;;; ;;
-;;; (define (get-best-server acfg dbname key)
-;;; (let* (;; (servers (hash-table-values (area-hosts acfg)))
-;;; (servers (area-hosts acfg))
-;;; (skeys (sort (hash-table-keys servers) string>=?)) ;; a stable listing
-;;; (start-time (current-milliseconds))
-;;; (srvstats (make-hash-table)) ;; srvid => srvstat
-;;; (url (conc (area-myaddr acfg) ":" (area-port acfg))))
-;;; ;; (print "scores for " dbname ": " (map (lambda (k)(cons k (calc-server-score acfg dbname k))) skeys))
-;;; (if (null? skeys)
-;;; (if (> (update-known-servers acfg) 0)
-;;; (get-best-server acfg dbname key) ;; some risk of infinite loop here, TODO add try counter
-;;; (begin
-;;; (print "ERROR: no server found!") ;; since this process is also a server this should never happen
-;;; #f))
-;;; (begin
-;;; ;; (print "in get-best-server with skeys=" skeys)
-;;; (if (> (- (current-seconds) (area-last-srvup acfg)) 10)
-;;; (begin
-;;; (update-known-servers acfg)
-;;; (sdbg> "get-best-server" "update-known-servers" start-time #f #f)))
-;;;
-;;; ;; for each server look at the list of dbfiles, total number of dbs being handled
-;;; ;; and the rand number, save the best host
-;;; ;; also do a delist-db for each server dbfile not used
-;;; (let* ((best-server #f)
-;;; (servers-to-delist (make-hash-table)))
-;;; (for-each
-;;; (lambda (srvid)
-;;; (let* ((server (hash-table-ref/default servers srvid #f))
-;;; (stats (hash-table-ref/default (area-hoststats acfg) srvid '(()))))
-;;; ;; (print "stats: " stats)
-;;; (if server
-;;; (let* ((dbweights (car stats))
-;;; (srvload (length (filter (lambda (x)(not (equal? dbname (car x)))) dbweights)))
-;;; (dbrec (alist-ref dbname dbweights equal?)) ;; get the pair with fname . randscore
-;;; (randnum (if dbrec
-;;; dbrec ;; (cdr dbrec)
-;;; 0)))
-;;; (hash-table-set! srvstats srvid (make-srvstat numfiles: srvload randnum: randnum pkt: server))))))
-;;; skeys)
-;;;
-;;; (let* ((sorted (sort (hash-table-values srvstats)
-;;; (lambda (a b)
-;;; (let ((numfiles-a (srvstat-numfiles a))
-;;; (numfiles-b (srvstat-numfiles b))
-;;; (randnum-a (srvstat-randnum a))
-;;; (randnum-b (srvstat-randnum b)))
-;;; (if (< numfiles-a numfiles-b) ;; Note, I don't think adding an offset works here. Goal was only move file handling to a different server if it has 2 less
-;;; #t
-;;; (if (and (equal? numfiles-a numfiles-b)
-;;; (< randnum-a randnum-b))
-;;; #t
-;;; #f))))))
-;;; (best (if (null? sorted)
-;;; (begin
-;;; (print "ERROR: should never be null due to self as server.")
-;;; #f)
-;;; (srvstat-pkt (car sorted)))))
-;;; #;(print "SERVER(" url "): " dbname ": " (map (lambda (srv)
-;;; (let ((p (srvstat-pkt srv)))
-;;; (conc (alist-ref 'ipaddr p) ":" (alist-ref 'port p)
-;;; "(" (srvstat-numfiles srv)","(srvstat-randnum srv)")")))
-;;; sorted))
-;;; best))))))
-;;;
-;;; ;; send out an "I'm about to exit notice to all known servers"
-;;; ;;
-;;; (define (death-imminent acfg)
-;;; '())
-;;;
-;;; ;;======================================================================
-;;; ;; U L E X - T H E I N T E R E S T I N G S T U F F ! !
-;;; ;;======================================================================
-;;;
-;;; ;; register a handler
-;;; ;; NOTES:
-;;; ;; dbinitsql is reserved for a list of sql statements for initializing the db
-;;; ;; dbinitfn is reserved for a db init function, if exists called after dbinitsql
-;;; ;;
-;;; (define (register acfg key obj #!optional (ctype 'dbwrite))
-;;; (let ((ht (area-rtable acfg)))
-;;; (if (hash-table-exists? ht key)
-;;; (print "WARNING: redefinition of entry " key))
-;;; (hash-table-set! ht key (make-calldat obj: obj ctype: ctype))))
-;;;
-;;; ;; usage: register-batch acfg '((key1 . sql1) (key2 . sql2) ... )
-;;; ;; NB// obj is often an sql query
-;;; ;;
-;;; (define (register-batch acfg ctype data)
-;;; (let ((ht (area-rtable acfg)))
-;;; (map (lambda (dat)
-;;; (hash-table-set! ht (car dat)(make-calldat obj: (cdr dat) ctype: ctype)))
-;;; data)))
-;;;
-;;; (define (initialize-area-calls-from-specfile area specfile)
-;;; (let* ((callspec (with-input-from-file specfile read )))
-;;; (for-each (lambda (group)
-;;; (register-batch
-;;; area
-;;; (car group)
-;;; (cdr group)))
-;;; callspec)))
-;;;
-;;; ;; get-rentry
-;;; ;;
-;;; (define (get-rentry acfg key)
-;;; (hash-table-ref/default (area-rtable acfg) key #f))
-;;;
-;;; (define (get-rsql acfg key)
-;;; (let ((cdat (get-rentry acfg key)))
-;;; (if cdat
-;;; (calldat-obj cdat)
-;;; #f)))
-;;;
-;;;
-;;;
-;;; ;; blocking call:
-;;; ;; client server
-;;; ;; ------ ------
-;;; ;; call()
-;;; ;; send-message()
-;;; ;; nmsg-send()
-;;; ;; nmsg-receive()
-;;; ;; nmsg-respond(ack,cookie)
-;;; ;; ack, cookie
-;;; ;; mbox-thread-wait(cookie)
-;;; ;; nmsg-send(client,cookie,result)
-;;; ;; nmsg-respond(ack)
-;;; ;; return result
-;;; ;;
-;;; ;; reserved action:
-;;; ;; 'immediate
-;;; ;; 'dbinitsql
-;;; ;;
-;;; (define (call acfg dbname action params #!optional (count 0))
-;;; (let* ((call-start-time (current-milliseconds))
-;;; (srv (get-best-server acfg dbname action))
-;;; (post-get-start-time (current-milliseconds))
-;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f))
-;;; (myid (trim-pktid (area-pktid acfg)))
-;;; (srvid (trim-pktid (alist-ref 'Z srv)))
-;;; (cookie (make-cookie myid)))
-;;; (sdbg> "call" "get-best-server" call-start-time #f call-start-time " from: " myid " to server: " srvid " for " dbname " action: " action " params: " params " rdat: " rdat)
-;;; (print "INFO: call to " (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv) " from " (area-myaddr acfg) ":" (area-port acfg) " for " dbname)
-;;; (if (and srv rdat) ;; need both to dispatch a request
-;;; (let* ((ripaddr (alist-ref 'ipaddr srv))
-;;; (rsrvid (alist-ref 'Z srv))
-;;; (rport (any->number (alist-ref 'port srv)))
-;;; (res-full (if (and (equal? ripaddr (area-myaddr acfg))
-;;; (equal? rport (area-port acfg)))
-;;; (request acfg ripaddr rport (area-pktid acfg) action cookie dbname params)
-;;; (safe-call 'request ripaddr rport
-;;; (area-myaddr acfg)
-;;; (area-port acfg)
-;;; #;(area-pktid acfg)
-;;; rsrvid
-;;; action cookie dbname params))))
-;;; ;; (print "res-full: " res-full)
-;;; (match res-full
-;;; ((response-ok response-msg rem ...)
-;;; (let* ((send-message-time (current-milliseconds))
-;;; ;; (match res-full
-;;; ;; ((response-ok response-msg)
-;;; ;; (response-ok (car res-full))
-;;; ;; (response-msg (cadr res-full)
-;;; )
-;;; ;; (res (take res-full 3))) ;; ctype == action, TODO: converge on one term <<=== what was this? BUG
-;;; ;; (print "ulex:call: send-message took " (- send-message-time post-get-start-time) " ms params=" params)
-;;; (sdbg> "call" "send-message" post-get-start-time #f call-start-time)
-;;; (cond
-;;; ((not response-ok) #f)
-;;; ((member response-msg '("db read submitted" "db write submitted"))
-;;; (let* ((cookie-id (cadddr res-full))
-;;; (mbox (make-mailbox))
-;;; (mbox-time (current-milliseconds)))
-;;; (hash-table-set! (area-cookie2mbox acfg) cookie-id mbox)
-;;; (let* ((mbox-timeout-secs 20)
-;;; (mbox-timeout-result 'MBOX_TIMEOUT)
-;;; (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
-;;; (mbox-receive-time (current-milliseconds)))
-;;; (hash-table-delete! (area-cookie2mbox acfg) cookie-id)
-;;; (sdbg> "call" "mailbox-receive" mbox-time #f call-start-time " from: " myid " to server: " srvid " for " dbname)
-;;; ;; (print "ulex:call mailbox-receive took " (- mbox-receive-time mbox-time) "ms params=" params)
-;;; res)))
-;;; (else
-;;; (print "Unhandled response \""response-msg"\"")
-;;; #f))
-;;; ;; depending on what action (i.e. ctype) is we will block here waiting for
-;;; ;; all the data (mechanism to be determined)
-;;; ;;
-;;; ;; if res is a "working on it" then wait
-;;; ;; wait for result
-;;; ;; mailbox thread wait on
-;;;
-;;; ;; if res is a "can't help you" then try a different server
-;;; ;; if res is a "ack" (e.g. for one-shot requests) then return res
-;;; ))
-;;; (else
-;;; (if (< count 10)
-;;; (let* ((url (conc (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv))))
-;;; (thread-sleep! 1)
-;;; (print "ERROR: Bad result from " url ", dbname: " dbname ", action: " action ", params: " params ". Trying again in 1 second.")
-;;; (call acfg dbname action params (+ count 1)))
-;;; (begin
-;;; (error (conc "ERROR: " count " tries, still have improper response res-full=" res-full)))))))
-;;; (begin
-;;; (if (not rdat)
-;;; (print "ERROR: action " action " not registered.")
-;;; (if (< count 10)
-;;; (begin
-;;; (thread-sleep! 1)
-;;; (area-hosts-set! acfg (make-hash-table)) ;; clear out all known hosts
-;;; (print "ERROR: no server found, srv=" srv ", trying again in 1 seconds")
-;;; (call acfg dbname action params (+ count 1)))
-;;; (begin
-;;; (error (conc "ERROR: no server found after 10 tries, srv=" srv ", giving up."))
-;;; #;(error "No server available"))))))))
-;;;
-;;;
-;;; ;;======================================================================
-;;; ;; U T I L I T I E S
-;;; ;;======================================================================
-;;;
-;;; ;; get a signature for identifing this process
-;;; ;;
-;;; (define (get-process-signature)
-;;; (cons (get-host-name)(current-process-id)))
-;;;
-;;; ;;======================================================================
-;;; ;; S Y S T E M S T U F F
-;;; ;;======================================================================
-;;;
-;;; ;; get normalized cpu load by reading from /proc/loadavg and
-;;; ;; /proc/cpuinfo return all three values and the number of real cpus
-;;; ;; and the number of threads returns alist '((adj-cpu-load
-;;; ;; . normalized-proc-load) ... etc. keys: adj-proc-load,
-;;; ;; adj-core-load, 1m-load, 5m-load, 15m-load
-;;; ;;
-;;; (define (get-normalized-cpu-load)
-;;; (let ((res (get-normalized-cpu-load-raw))
-;;; (default `((adj-proc-load . 2) ;; there is no right answer
-;;; (adj-core-load . 2)
-;;; (1m-load . 2)
-;;; (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong
-;;; (15m-load . 0)
-;;; (proc . 1)
-;;; (core . 1)
-;;; (phys . 1)
-;;; (error . #t))))
-;;; (cond
-;;; ((and (list? res)
-;;; (> (length res) 2))
-;;; res)
-;;; ((eq? res #f) default) ;; add messages?
-;;; ((eq? res #f) default) ;; this would be the #eof
-;;; (else default))))
-;;;
-;;; (define (get-normalized-cpu-load-raw)
-;;; (let* ((actual-host (get-host-name))) ;; #f is localhost
-;;; (let ((data (append
-;;; (with-input-from-file "/proc/loadavg" read-lines)
-;;; (with-input-from-file "/proc/cpuinfo" read-lines)
-;;; (list "end")))
-;;; (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
-;;; (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$"))
-;;; (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$"))
-;;; (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$"))
-;;; (max-num (lambda (p n)(max (string->number p) n))))
-;;; ;; (print "data=" data)
-;;; (if (null? data) ;; something went wrong
-;;; #f
-;;; (let loop ((hed (car data))
-;;; (tal (cdr data))
-;;; (loads #f)
-;;; (proc-num 0) ;; processor includes threads
-;;; (phys-num 0) ;; physical chip on motherboard
-;;; (core-num 0)) ;; core
-;;; ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num)
-;;; (if (null? tal) ;; have all our data, calculate normalized load and return result
-;;; (let* ((act-proc (+ proc-num 1))
-;;; (act-phys (+ phys-num 1))
-;;; (act-core (+ core-num 1))
-;;; (adj-proc-load (/ (car loads) act-proc))
-;;; (adj-core-load (/ (car loads) act-core))
-;;; (result
-;;; (append (list (cons 'adj-proc-load adj-proc-load)
-;;; (cons 'adj-core-load adj-core-load))
-;;; (list (cons '1m-load (car loads))
-;;; (cons '5m-load (cadr loads))
-;;; (cons '15m-load (caddr loads)))
-;;; (list (cons 'proc act-proc)
-;;; (cons 'core act-core)
-;;; (cons 'phys act-phys)))))
-;;; result)
-;;; (regex-case
-;;; hed
-;;; (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num))
-;;; (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num))
-;;; (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num))
-;;; (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num)))
-;;; (else
-;;; (begin
-;;; ;; (print "NO MATCH: " hed)
-;;; (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))
-;;;
-;;; (define (get-host-stats acfg)
-;;; (let ((stats-hash (area-stats acfg)))
-;;; ;; use this opportunity to remove references to dbfiles which have not been accessed in a while
-;;; (for-each
-;;; (lambda (dbname)
-;;; (let* ((stats (hash-table-ref stats-hash dbname))
-;;; (last-access (stat-when stats)))
-;;; (if (and (> last-access 0) ;; if zero then there has been no access
-;;; (> (- (current-seconds) last-access) 10)) ;; not used in ten seconds
-;;; (begin
-;;; (print "Removing " dbname " from stats list")
-;;; (hash-table-delete! stats-hash dbname) ;; remove from stats hash
-;;; (stat-dbs-set! stats (hash-table-keys stats))))))
-;;; (hash-table-keys stats-hash))
-;;;
-;;; `(,(hash-table->alist (area-dbs acfg)) ;; dbname => randnum
-;;; ,(map (lambda (dbname) ;; dbname is the db name
-;;; (cons dbname (stat-when (hash-table-ref stats-hash dbname))))
-;;; (hash-table-keys stats-hash))
-;;; (cpuload . ,(get-normalized-cpu-load)))))
-;;; #;(stats . ,(map (lambda (k) ;; create an alist from the stats data
-;;; (cons k (stat->alist (hash-table-ref (area-stats acfg) k))))
-;;; (hash-table-keys (area-stats acfg))))
-;;;
-;;; #;(trace
-;;; ;; assv
-;;; ;; cdr
-;;; ;; caar
-;;; ;; ;; cdr
-;;; ;; call
-;;; ;; finalize-all-db-handles
-;;; ;; get-all-server-pkts
-;;; ;; get-normalized-cpu-load
-;;; ;; get-normalized-cpu-load-raw
-;;; ;; launch
-;;; ;; nmsg-send
-;;; ;; process-db-queries
-;;; ;; receive-message
-;;; ;; std-peer-handler
-;;; ;; update-known-servers
-;;; ;; work-queue-processor
-;;; )
-;;;
-;;; ;;======================================================================
-;;; ;; netutil
-;;; ;; move this back to ulex-netutil.scm someday?
-;;; ;;======================================================================
-;;;
-;;; ;; #include
-;;; ;; #include
-;;; ;; #include
-;;; ;; #include
-;;;
-;;; (foreign-declare "#include \"sys/types.h\"")
-;;; (foreign-declare "#include \"sys/socket.h\"")
-;;; (foreign-declare "#include \"ifaddrs.h\"")
-;;; (foreign-declare "#include \"arpa/inet.h\"")
-;;;
-;;; ;; get IP addresses from ALL interfaces
-;;; (define get-all-ips
-;;; (foreign-safe-lambda* scheme-object ()
-;;; "
-;;;
-;;; // from https://stackoverflow.com/questions/17909401/linux-c-get-default-interfaces-ip-address :
-;;;
-;;;
-;;; C_word lst = C_SCHEME_END_OF_LIST, len, str, *a;
-;;; // struct ifaddrs *ifa, *i;
-;;; // struct sockaddr *sa;
-;;;
-;;; struct ifaddrs * ifAddrStruct = NULL;
-;;; struct ifaddrs * ifa = NULL;
-;;; void * tmpAddrPtr = NULL;
-;;;
-;;; if ( getifaddrs(&ifAddrStruct) != 0)
-;;; C_return(C_SCHEME_FALSE);
-;;;
-;;; // for (i = ifa; i != NULL; i = i->ifa_next) {
-;;; for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) {
-;;; if (ifa->ifa_addr->sa_family==AF_INET) { // Check it is
-;;; // a valid IPv4 address
-;;; tmpAddrPtr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
-;;; char addressBuffer[INET_ADDRSTRLEN];
-;;; inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN);
-;;; // printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer);
-;;; len = strlen(addressBuffer);
-;;; a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len));
-;;; str = C_string(&a, len, addressBuffer);
-;;; lst = C_a_pair(&a, str, lst);
-;;; }
-;;;
-;;; // else if (ifa->ifa_addr->sa_family==AF_INET6) { // Check it is
-;;; // // a valid IPv6 address
-;;; // tmpAddrPtr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
-;;; // char addressBuffer[INET6_ADDRSTRLEN];
-;;; // inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN);
-;;; //// printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer);
-;;; // len = strlen(addressBuffer);
-;;; // a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len));
-;;; // str = C_string(&a, len, addressBuffer);
-;;; // lst = C_a_pair(&a, str, lst);
-;;; // }
-;;;
-;;; // else {
-;;; // printf(\" not an IPv4 address\\n\");
-;;; // }
-;;;
-;;; }
-;;;
-;;; freeifaddrs(ifa);
-;;; C_return(lst);
-;;;
-;;; "))
-;;;
-;;; ;; Change this to bias for addresses with a reasonable broadcast value?
-;;; ;;
-;;; (define (ip-pref-less? a b)
-;;; (let* ((rate (lambda (ipstr)
-;;; (regex-case ipstr
-;;; ( "^127\\." _ 0 )
-;;; ( "^(10\\.0|192\\.168\\.)\\..*" _ 1 )
-;;; ( else 2 ) ))))
-;;; (< (rate a) (rate b))))
-;;;
-;;;
-;;; (define (get-my-best-address)
-;;; (let ((all-my-addresses (get-all-ips))
-;;; ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))
-;;; )
-;;; (cond
-;;; ((null? all-my-addresses)
-;;; (get-host-name)) ;; no interfaces?
-;;; ((eq? (length all-my-addresses) 1)
-;;; (car all-my-addresses)) ;; only one to choose from, just go with it
-;;;
-;;; (else
-;;; (car (sort all-my-addresses ip-pref-less?)))
-;;; ;; (else
-;;; ;; (ip->string (car (filter (lambda (x) ;; take any but 127.
-;;; ;; (not (eq? (u8vector-ref x 0) 127)))
-;;; ;; all-my-addresses))))
-;;;
-;;; )))
-;;;
-;;; (define (get-all-ips-sorted)
-;;; (sort (get-all-ips) ip-pref-less?))
-;;;
-;;;
-
+ (map address-info-host
+ (filter (lambda (x)
+ (equal? (address-info-type x) "tcp"))
+ (address-infos (get-host-name)))))
+
+)