Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -22,37 +22,39 @@ CSCOPTS= INSTALL=install SRCFILES = # all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard -all : $(PREFIX)/bin/.$(ARCHSTR) mtest +all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtserve # add dboard mtut and tcmt back later + +# Configuration stuff +transport-flavor : + @echo Creating transport-flavor with full as flavor. Options include: full, simple + echo full > transport-flavor + +ulex.scm dbmgrmod.scm : ulex.scm.template dbmgrmod.scm.template transport-flavor ulex-*/*scm + ./configure # module source files MSRCFILES = autoload.scm dbi.scm ducttape-lib.scm pkts.scm stml2.scm \ - cookie.scm mutils.scm mtargs.scm apimod.scm \ + cookie.scm mutils.scm mtargs.scm apimod.scm ulex.scm \ configfmod.scm commonmod.scm dbmod.scm rmtmod.scm \ debugprint.scm mtver.scm csv-xml.scm servermod.scm \ - hostinfo.scm adjutant.scm processmod.scm testsmod.scm \ + adjutant.scm processmod.scm testsmod.scm \ itemsmod.scm keysmod.scm mtmod.scm rmtmod.scm \ tasksmod.scm pgdb.scm launchmod.scm runsmod.scm \ portloggermod.scm archivemod.scm ezstepsmod.scm \ - subrunmod.scm bigmod.scm testsmod.scm - -# GUISRCF = + subrunmod.scm bigmod.scm testsmod.scm dbmgrmod.scm GUIMODFILES = tree.scm dashboard-tests.scm vgmod.scm \ dashboard-context-menu.scm dcommon.scm -# dashboard-guimonitor.scm - mofiles/dashboard-context-menu.o : mofiles/dcommon.o mofiles/dashboard-tests.o : mofiles/dcommon.o -# mofiles/dcommon.o mofiles/tree.o : mofiles/gutils.o OFILES = $(SRCFILES:%.scm=%.o) -# GOFILES = $(GUISRCF:%.scm=%.o) MOFILES = $(addprefix mofiles/,$(MSRCFILES:%.scm=%.o)) GMOFILES = $(addprefix mofiles/,$(GUIMODFILES:%.scm=%.o)) # compiled import files @@ -77,11 +79,10 @@ mofiles/bigmod.o : mofiles/rmtmod.o # mofiles/clientmod.o : mofiles/servermod.oibpq-dev mofiles/commonmod.o : megatest-fossil-hash.scm mofiles/commonmod.o : mofiles/configfmod.o mofiles/commonmod.o : mofiles/debugprint.o -mofiles/commonmod.o : mofiles/hostinfo.o mofiles/commonmod.o : mofiles/keysmod.o mofiles/commonmod.o : mofiles/mtargs.o mofiles/commonmod.o : mofiles/mtver.o mofiles/commonmod.o : mofiles/processmod.o mofiles/configfmod.o : mofiles/keysmod.o @@ -99,19 +100,24 @@ mofiles/launchmod.o : mofiles/rmtmod.o mofiles/servermod.o mofiles/mtmod.o : mofiles/debugprint.o mofiles/portloggermod.o : mofiles/tasksmod.o mofiles/rmtmod.o : mofiles/apimod.o mofiles/rmtmod.o : mofiles/commonmod.o mofiles/portloggermod.o -mofiles/rmtmod.o : mofiles/itemsmod.o # mofiles/clientmod.o +mofiles/rmtmod.o : mofiles/itemsmod.o mofiles/ulex.o +mofiles/rmtmod.o : mofiles/dbmgrmod.o mofiles/runsmod.o : mofiles/rmtmod.o mofiles/archivemod.o mofiles/servermod.o : mofiles/commonmod.o mofiles/stml2.o : mofiles/cookie.o mofiles/dbi.o mofiles/tasksmod.o : mofiles/pgdb.o mofiles/dbmod.o mofiles/testsmod.o : mofiles/commonmod.o mofiles/testsmod.o : mofiles/itemsmod.o mofiles/rmtmod.o mofiles/tasksmod.o + +# split modules. Note: we can switch between ulex and ulex simple. +mofiles/ulex.o : ulex/ulex.scm ulex-simple/ulex.scm dashboard.o megatest.o : db_records.scm megatest-fossil-hash.scm + ADTLSCR=mt_laststep mt_runstep mt_ezstep HELPERS=$(addprefix $(PREFIX)/bin/,$(ADTLSCR)) DEPLOYHELPERS=$(addprefix deploytarg/,$(ADTLSCR)) MTESTHASH=$(shell fossil info|grep checkout:| awk '{print $$2}') @@ -130,10 +136,13 @@ PNGFILES = $(shell cd docs/manual;ls *png) mtest: megatest.scm $(MOFILES) megatest-fossil-hash.scm csc $(CSCOPTS) $(MOFILES) megatest.scm -o mtest + +mtserve: mtserve.scm $(MOFILES) megatest-fossil-hash.scm + csc $(CSCOPTS) $(MOFILES) mtserve.scm -o mtserve # $(MOIMPFILES) removed showmtesthash: @echo $(MTESTHASH) @@ -209,10 +218,16 @@ $(PREFIX)/bin/.$(ARCHSTR)/mtest : mtest utils/mk_wrapper @echo Installing to PREFIX=$(PREFIX) $(INSTALL) mtest $(PREFIX)/bin/.$(ARCHSTR)/mtest utils/mk_wrapper $(PREFIX) mtest $(PREFIX)/bin/megatest chmod a+x $(PREFIX)/bin/megatest + +$(PREFIX)/bin/.$(ARCHSTR)/mtserve : mtserve utils/mk_wrapper + @echo Installing to PREFIX=$(PREFIX) + $(INSTALL) mtserve $(PREFIX)/bin/.$(ARCHSTR)/mtserve + utils/mk_wrapper $(PREFIX) mtserve $(PREFIX)/bin/mtserver + chmod a+x $(PREFIX)/bin/mtserver $(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard $(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/newdashboard : $(PREFIX)/bin/.$(ARCHSTR)/ndboard utils/mk_wrapper @@ -340,11 +355,15 @@ # $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \ # $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \ # $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 # $(PREFIX)/bin/tcmt -install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ +$(PREFIX)/bin/megatest : $(PREFIX)/bin/.$(ARCHSTR)/mtest +$(PREFIX)/bin/mtserver : $(PREFIX)/bin/.$(ARCHSTR)/mtserve + +install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest \ + $(PREFIX)/bin/megatest $(PREFIX)/bin/mtserver \ $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/serialize-env \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/db/mt-pg.sql \ Index: TODO ================================================================== --- TODO +++ TODO @@ -18,10 +18,13 @@ TODO ==== Loose ends ---------- + +15:09:29 error in calling find-and-mark-incomplete for run-id 5, exn=# + might be related to initial conditions in the db. (no run entry in runs table?). . -list-servers not correct . move *remotedat* into bigdata . add back server stats on exit (look in rmt:run in rmtmod.scm) Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -29,11 +29,11 @@ ( api:run-server-process api:start-server api:dispatch-cmd api:execute-requests -api:process-request +;; api:process-request ) (import scheme chicken.base chicken.process-context.posix @@ -175,12 +175,13 @@ (seconds->year-work-week/day-time-fname (current-seconds)) "-"cleandbname".log")) (logf2 (conc logd "/server-" (seconds->year-work-week/day-time-fname (current-seconds)) "-"cleandbname"-")) - (cmd (conc "nbfake megatest -server - -area "apath - " -db "dbname" -autolog "logf2))) + (cmd (conc "nbfake mtserver -server - -area "apath" -db "dbname) + ;; " -autolog "logf2 ;; the side log did not help. Ended up with two logs and the pid in the name was not that useful. + )) (if (not (directory-exists? logd)) (create-directory logd #t)) (system (conc "NBFAKE_LOG="logf" "cmd)))) ;; special function to get server @@ -374,11 +375,11 @@ ((general-call) (let ((stmtname (car params)) (run-id (cadr params)) (realparams (cddr params))) (db:general-call dbstruct stmtname run-id realparams))) ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) + ((ping) `(#t ,(current-process-id) ,(cadr params))) ;; (current-process-id)) ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) @@ -420,11 +421,11 @@ ;; api:process-request ;; db:* ;; ;; NB// Runs on the server as part of the server loop ;; -(define (api:process-request dbstruct indat) ;; the $ is the request vars proc +#;(define (api:process-request dbstruct indat) ;; the $ is the request vars proc (let* ((cmd-in (alist-ref 'cmd indat)) ;; ($ 'cmd)) (cmd (if (string? cmd-in)(string->symbol cmd-in) cmd-in)) (params (alist-ref 'params indat)) (key (alist-ref 'key indat)) ;; TODO - add this back ;; (doprint (apply common:low-noise-print 10 params)) ADDED attic/configure Index: attic/configure ================================================================== --- /dev/null +++ attic/configure @@ -0,0 +1,101 @@ +#!/bin/bash + +# Copyright 2006-2017, Matthew Welland. +# +# This file is part of Megatest. +# +# Megatest is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Megatest is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Megatest. If not, see . + +# Configure the build + +if [[ "$1"x == "x" ]];then + PREFIX=$PWD +else + PREFIX=$1 +fi + + +#====================================================================== +# Configure stuff needed for eggs +#====================================================================== + +function configure_dependencies () { + + #====================================================================== + # libnanomsg + #====================================================================== + + if [[ ! $(ls /usr/lib/*/libnanomsg*) ]];then + echo "libnanomsg build needed." + echo "BUILD_NANOMSG=yes" >> makefile.inc + fi + + #====================================================================== + # postgresql libraries + #====================================================================== + + if [[ ! $(ls /usr/lib/*/libpq.*) ]];then + echo "Postgresql build needed." + echo "BUILD_POSTGRES=yes" >> makefile.inc + fi + + if [[ ! $(ls /usr/lib/*/libsqlite3.*) ]];then + echo "Sqlite3 build needed." + echo "BUILD_SQLITE3=yes" >> makefile.inc + fi + +} + +#====================================================================== +# Initialize makefile.inc +#====================================================================== + +echo "" > makefile.inc + +#====================================================================== +# Do we need Chicken? +#====================================================================== + +if [[ -e /usr/bin/sw_vers ]]; then + ARCHSTR=$(/usr/bin/sw_vers -productVersion) +else + ARCHSTR=$(lsb_release -sr) +fi + +echo "CHICKEN_PREFIX=$PREFIX/.$ARCHSTR" >> makefile.inc +CHICKEN_PREFIX=$PREFIX/bin/.$ARCHSTR + +if [[ ! $(type csi) ]];then + echo "Chicken build needed." + echo "BUILD_CHICKEN=yes" >> makefile.inc + configure_dependencies + echo "include chicken.makefile" >> makefile.inc +else + echo "CSIPATH=$(which csi)" >> makefile.inc + CSIPATH=$(which csi) + echo "CKPATH=$(dirname $(dirname $CSIPATH))" >> makefile.inc +fi + +# Make setup scripts +echo "#!/bin/bash" > setup.sh +echo "export PATH=$CHICKEN_PREFIX/bin:\$PATH" >> setup.sh +echo "export LD_LIBRARY_PATH=$CHICKEN_PREFIX/lib" >> setup.sh +echo 'exec "$@"' >> setup.sh +chmod a+x setup.sh + +echo "setenv PATH $CHICKEN_PREFIX/bin:\$PATH" > setup.csh +echo "setenv LD_LIBRARY_PATH $CHICKEN_PREFIX/lib" >> setup.csh + +echo "All done creating makefile.inc, feel free to edit it!" +echo "run \"setup.sh bash\" or source setup.csh to get PATH and LD_LIBRARY_PATH adjusted" Index: build-assist/README ================================================================== --- build-assist/README +++ build-assist/README @@ -1,28 +1,12 @@ -Here is how I like to install chicken for building Megatest. - -This guide assumes you have the Megatest fossil and are in the build-assist directory and -that you have the opensrc fossil with uv synced: - -fossil clone https://www.kiatoa.com/fossils/megatest -fossil clone https://www.kiatoa.com/fossils/opensrc;cd opensrc;fossil uv sync - -Make a build directory and go to it: - -mkdir build;cd build - -Make a destination directory and set PREFIX - -export PREFIX=/opt/chicken/5.3.0; mkdir -p $PREFIX - -Get chicken: - -wget https://code.call-cc.org/releases/5.3.0/chicken-5.3.0.tar.gz - -Extract, build, and install chicken: - -tar xf chicken-5.3.0.tar.gz; cd chicken-5.3.0; make PLATFORM=linux PREFIX=$PREFIX install; cd .. - -Install all needed eggs. -for egg in $(cat ../ck5-egg.list);do echo $egg;ck5 chicken-install $egg;done - -Now run the script ../iup-compile.sh for remaining instructions +README for IUP + + IUP is a portable toolkit for building graphical user interfaces. It offers a configuration API in three basic languages: C, Lua and LED. IUP's purpose is to allow a program to be executed in different systems without any modification, therefore it is highly portable. Its main advantages are: + * high performance, due to the fact that it uses native interface elements. + * fast learning by the user, due to the simplicity of its API. + + Build instructions and usage are available in the IUP documentation. + + For complete information, visit IUP's web site at http://www.tecgraf.puc-rio.br/iup + or access its documentation in the HTML folder. + +(end of README) Index: build-assist/ck5-eggs.list ================================================================== --- build-assist/ck5-eggs.list +++ build-assist/ck5-eggs.list @@ -1,48 +0,0 @@ -csm -address-info -ansi-escape-sequences -apropos -base64 -crypt -csv-abnf -directory-utils -dot-locking -filepath -fmt -format -http-client -itemsmod -json -linenoise -mailbox -md5 -message-digest -nanomsg -postgresql -queues -regex -regex-case -rfc3339 -s11n -sha1 -simple-exceptions -slice -sparse-vectors -spiffy -spiffy-directory-listing -spiffy-request-vars -sql-de-lite -sqlite3 -sql-null -srfi-1 -srfi-13 -srfi-19 -sxml-modifications -sxml-serializer -sxml-transforms -system-information -tcp6 -test -typed-records -uri-common -z3 ADDED build-assist/ck5-full-eggs.list Index: build-assist/ck5-full-eggs.list ================================================================== --- /dev/null +++ build-assist/ck5-full-eggs.list @@ -0,0 +1,49 @@ +csm +address-info +ansi-escape-sequences +apropos +base64 +breadline +crypt +csv-abnf +directory-utils +dot-locking +filepath +fmt +format +http-client +itemsmod +json +linenoise +mailbox +md5 +message-digest +nanomsg +postgresql +queues +regex +regex-case +rfc3339 +s11n +sha1 +simple-exceptions +slice +sparse-vectors +spiffy +spiffy-directory-listing +spiffy-request-vars +sql-de-lite +sqlite3 +sql-null +srfi-1 +srfi-13 +srfi-19 +sxml-modifications +sxml-serializer +sxml-transforms +system-information +tcp6 +test +typed-records +uri-common +z3 Index: build-assist/iup-compile.sh ================================================================== --- build-assist/iup-compile.sh +++ build-assist/iup-compile.sh @@ -15,12 +15,11 @@ echo " cp *.a *.so $PREFIX/lib" echo " cp include/*.h $PREFIX/include" echo " 4. run the chicken-install like this:" echo "If you use a wrapper (e.g. ck5) to create the chicken environment:" -echo "CSC_OPTIONS=\"-I$PREFIX/include -I$PREFIX/include/im -I$PREFIX/include/cd -I$PREFIX/include/iup -L$PREFIX/lib -C -std=gnu99\" ck5 chicken-install iup -feature disable-iup-matrixex" +echo "CSC_OPTIONS=\"-I$PREFIX/include -I$PREFIX/include/im -I$PREFIX/include/cd -I$PREFIX/include/iup -L$PREFIX/lib -C -std=gnu99\" ck5 chicken-install iup canvas-draw -feature disable-iup-matrixex" echo "else:" -echo "CSC_OPTIONS=\"-I$PREFIX/include -I$PREFIX/include/im -I$PREFIX/include/cd -I$PREFIX/include/iup -L$PREFIX/lib -C -std=gnu99\" chicken-install iup" -echo "Then repeat for canvas-draw" +echo "CSC_OPTIONS=\"-I$PREFIX/include -I$PREFIX/include/im -I$PREFIX/include/cd -I$PREFIX/include/iup -L$PREFIX/lib -C -std=gnu99\" chicken-install iup canvas-draw" # (export PREFIX=/home/matt/data/buildall/ck5.2;CSC_OPTIONS="-I/home/matt/data/buildall/ck5.2/include -I/home/matt/data/buildall/ck5.2/include/im -I/home/matt/data/buildall/ck5.2/include/cd -I/home/matt/data/buildall/ck5.2/include/iup -L/home/matt/data/buildall/ck5.2/lib -C -std=gnu99" ck5 chicken-install iup -feature disable-iup-matrixex) Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -24,11 +24,11 @@ (declare (uses stml2)) (declare (uses pkts)) (declare (uses processmod)) (declare (uses mtargs)) (declare (uses configfmod)) -(declare (uses hostinfo)) +;; (declare (uses hostinfo)) (declare (uses keysmod)) ;; odd but it works? ;; (declare (uses itemsmod)) @@ -172,10 +172,11 @@ runs:runrec-serverdat runs:runrec-transport runs:runrec-db runs:runrec-top-path runs:runrec-run_id +test:testdat? test:get-id test:get-run_id test:get-test-name test:get-state test:get-status @@ -184,10 +185,11 @@ make-and-init-bigdata call-with-environment-variables common:simple-file-lock common:simple-file-lock-and-wait common:simple-file-release-lock +common:with-simple-file-lock common:fail-safe get-file-descriptor-count common:get-this-exe-fullpath common:get-sync-lock-filepath common:find-local-megatest @@ -386,15 +388,15 @@ *db-access-mutex* *db-transaction-mutex* *db-cache-path* *db-with-db-mutex* *db-api-call-time* +*didsomething* *no-sync-db* *my-signature* *transport-type* *logged-in-clients* -*server-info* *server-run* *run-id* *server-kind-run* *home-host* *heartbeat-mutex* @@ -507,10 +509,14 @@ runs:gendat-run-info-set! runs:gendat-runname-set! runs:gendat-target-set! megatest-fossil-hash + +rmt:mk-signature +rmt:get-signature + ) (import scheme chicken.base @@ -531,10 +537,12 @@ chicken.sort chicken.time.posix (prefix base64 base64:) (prefix sqlite3 sqlite3:) + + address-info directory-utils matchable md5 message-digest regex @@ -556,11 +564,11 @@ processmod (prefix mtargs args:) configfmod keysmod ;; itemsmod - hostinfo + ;; hostinfo ) ;;====================================================================== ;; CONTENTS ;; @@ -842,10 +850,14 @@ (define (test:get-test-name vec)(vector-ref vec 2)) (define (test:get-state vec) (vector-ref vec 3)) (define (test:get-status vec) (vector-ref vec 4)) (define (test:get-item-path vec)(vector-ref vec 5)) +(define (test:testdat? testdat) + (and (vector? testdat) + (>= (vector-length testdat) 6))) + (define (test:test-get-fullname test) (conc (db:test-get-testname test) (if (equal? (db:test-get-item-path test) "") "" (conc "(" (db:test-get-item-path test) ")")))) @@ -925,10 +937,11 @@ ;; config files. Allows communicating between confgs ;; (define *user-hash-data* (make-hash-table)) (define *db-keys* #f) +(define *didsomething* #f) (define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here (define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config (define *runconfigdat* #f) ;; run configs data (define *configdat* #f) ;; megatest.config data ==> moved to configfmod @@ -974,12 +987,10 @@ ;; replaced by *rmt:remote* ;; (define *runremote* #f) ;; if set up for server communication this will hold ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) ;; (define *server-id* #f) -(define *server-info* #f) ;; good candidate for easily convert to non-global -;; (define *time-to-exit* #f) (define *server-run* #t) (define *run-id* #f) (define *server-kind-run* (make-hash-table)) (define *home-host* #f) ;; (define *total-non-write-delay* 0) @@ -1144,10 +1155,27 @@ ;;====================================================================== ;; end globals ;;====================================================================== + +;; Generate a unique signature for this process, used at both client and +;; server side +(define (rmt:mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (current-process-id) + (argv))))))) + +(define (rmt:get-signature) + (if *my-signature* *my-signature* + (let ((sig (rmt:mk-signature))) + (set! *my-signature* sig) + *my-signature*))) + ;; 0 1 2 3 (defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) ;; copied from egg call-with-environment-variables @@ -1220,10 +1248,17 @@ (define (common:simple-file-release-lock fname) (handle-exceptions exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) + +(define (common:with-simple-file-lock fname proc) + (let* ((lkfname (conc fname ".lock"))) + (common:simple-file-lock-and-wait lkfname) + (let ((res (proc))) + (common:simple-file-release-lock lkfname) + res))) ;;====================================================================== ;; PUlled below from common.scm ;;====================================================================== @@ -2672,18 +2707,22 @@ (define (common:unix-ping hostname) (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) (eq? res 0))) (define (launch:is-test-alive host pid) - (if (and host pid (not (equal? host "n/a"))) - (let* ((cmd (conc "ssh " host " pstree -A " pid)) - (output (with-input-from-pipe cmd read-lines))) - (debug:print 2 *default-log-port* "Running " cmd " received " output) - (if (eq? (length output) 0) - #f - #t)) - #t)) + (let* ((same-host (equal? host (get-host-name))) + (cmd (conc + (if same-host "" (conc "ssh "host" ")) + "pstree -A "pid))) + (if (and host pid + (not (equal? host "n/a"))) + (let* ((output (with-input-from-pipe cmd read-lines))) + (debug:print 2 *default-log-port* "Running " cmd " received " output) + (if (eq? (length output) 0) + #f + #t)) + #t))) ;; assuming bad query is about a live test is likely not the right thing to do? (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; hosts had better not be changing the number of cpus too often! (or (hash-table-ref/default *numcpus-cache* actual-host #f) @@ -4343,11 +4382,15 @@ ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d) ;; (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) #f))) (append paths (list (conc *toppath* "/tests")))))) -(define (server:get-best-guess-address hostname) +;;====================================================================== +;; network utilities +;;====================================================================== + +#;(define (server:get-best-guess-address hostname) (let ((res #f)) (for-each (lambda (adr) (if (not (eq? (u8vector-ref adr 0) 127)) (set! res adr))) @@ -4355,10 +4398,42 @@ (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) (string-intersperse (map number->string (u8vector->list (if res res (hostname->ip hostname)))) "."))) + +;; NOTE: Look at address-info egg as alternative to some of this + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + +(define (server:get-best-guess-address hostname) + (let ((all-addresses (get-all-ips hostname))) + (cond + ((null? all-addresses) + hostname #;(get-host-name)) ;; no interfaces? + ((eq? (length all-addresses) 1) + (car all-addresses)) ;; only one to choose from, just go with it + (else + (car (sort all-addresses ip-pref-less?)))))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips hostname) + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) 'tcp)) + (address-infos hostname)))) (define (tests:readlines filename) (call-with-input-file filename (lambda (p) (let loop ((line (read-line p)) Index: configfmod.scm ================================================================== --- configfmod.scm +++ configfmod.scm @@ -18,10 +18,12 @@ ;;====================================================================== (declare (unit configfmod)) (declare (uses mtargs)) +(declare (uses commonmod)) +(declare (uses rmtmod)) (declare (uses debugprint)) (declare (uses keysmod)) (module configfmod ( @@ -49,10 +51,11 @@ configf:write-alist configf:write-config find-config getenv mytarget + my-with-lock nice-path process:cmd-run->list runconfig:read runconfigs-get safe-setenv @@ -89,10 +92,11 @@ (srfi 18) directory-utils dot-locking format matchable + mtargs md5 message-digest regex regex-case sparse-vectors @@ -114,14 +118,19 @@ ;;====================================================================== ;; while targets are Megatest specific they are a useful concept (define mytarget (make-parameter #f)) +;; fake locker +(define (fake-locker fname proc)(proc)) + ;; locking is optional, many environments don't care (e.g. running on one machine) ;; NOTE: the locker must follow the same syntax as with-dot-lock* +;; with-dot-lock* has problems if /tmp and the file being +;; locked are not on the same filesystem ;; -(define my-with-lock (make-parameter with-dot-lock*)) +(define my-with-lock (make-parameter fake-locker)) ;; with-dot-lock*)) ;;====================================================================== ;; move debug stuff to separate module then put these back where they belong ;;====================================================================== ;;====================================================================== @@ -388,12 +397,12 @@ (open-input-file path) path)) ;; we can be handed a port (res (let ((ht-in (if (not ht) (make-hash-table) ht))) - (if (not (configf:lookup ht-in "" "toppath")) - (configf:set-section-var ht-in "" "toppath" path)) + (if (not (configf:lookup ht-in "toppath" "toppath")) + (configf:set-section-var ht-in "toppath" "toppath" (pathname-directory path))) ht-in)) (metapath (if (or (debug:debug-mode 9) keep-filenames) path #f)) (process-wildcards (lambda (res curr-section-name) @@ -405,11 +414,10 @@ (hash-table-delete! res curr-section-name)))))) ;; NOTE: if the section is a wild card it will be REMOVED from res (let loop ((inl (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings env-to-use)) ;; (read-line inp)) (curr-section-name (if curr-section curr-section "default")) (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere (lead #f)) - (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n inl: \"" inl "\"") (if (eof-object? inl) (begin ;; process last section for wildcards (process-wildcards res curr-section-name) (if (string? path) ;; we received a path, not a port, thus we are responsible for closing it. @@ -418,10 +426,12 @@ (for-each (lambda (section) (if (not (member section sections)) (hash-table-delete! res section))) ;; we are using "" as a dumping ground and must remove it before returning the ht (hash-table-keys res))) + + (debug:print 9 *default-log-port* "END: " path) res ) ;; retval (regex-case inl @@ -1005,18 +1015,19 @@ (hash-table->alist data))) (define (runconfig:read fname target environ-patt) (let ((ht (make-hash-table))) (if target (hash-table-set! ht target '())) - (configf:read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f)))) + (configf:read-config fname ht #t environ-patt: environ-patt sections: (if target (list "toppath" "default" target) #f)))) ;;====================================================================== ;; Config file handling ;;====================================================================== ;; convert to param? -(define configf:std-imports "(import big-chicken configfmod commonmod rmtmod (prefix mtargs args:))") +(define configf:std-imports "(import scheme big-chicken system-information simple-exceptions big-chicken configfmod commonmod rmtmod testsmod srfi-69 chicken.process-context.posix)(import (prefix mtargs args:))(define getenv get-environment-variable)") + (define (configf:process-one matchdat l ht allow-system env-to-use linenum) (let* ((prestr (list-ref matchdat 1)) (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv (cmd (list-ref matchdat 3)) (quotedcmd (conc "\""cmd"\"")) @@ -1027,49 +1038,59 @@ (fullcmd (if (member cmdsym '(scheme scm)) `(eval-needed ,(conc "(lambda (ht)" configf:std-imports + ;; "(set! *toppath* \""(configf:lookup ht "toppath" "toppath")"\")" cmd ")")) (case cmdsym ((system) `(noeval-needed ,(conc (configf:system ht cmd)))) ;; ((shell sh) `(noeval-needed ,(conc (string-translate (shell quotedcmd) "\n" " ")))) ((shell sh) `(noeval-needed ,(conc (string-translate (shell cmd) "\n" " ")))) ((realpath rp)`(noeval-needed ,(conc (common:nice-path quotedcmd)))) ((getenv gv) `(noeval-needed ,(conc (get-environment-variable cmd)))) - ((mtrah) `(noeval-needed ,(configf:lookup ht "" "toppath"))) + ((mtrah) `(noeval-needed ,(configf:lookup ht "toppath" "toppath"))) ((get g) (match (string-split cmd) ((sect var) `(noeval-needed ,(configf:lookup ht sect var))) (else (debug:print-error 0 *default-log-port* "#{get ...} used with only one parameter, \"" cmd "\", two needed.") '(bad-param ,(conc "#{get ...} used with only one parameter, \"" cmd "\", two needed."))))) - ((runconfigs-get rget) `(noeval-needed ,(runconfigs-get ht quotedcmd))) ;; (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ;;((runconfigs-get rget) `(noeval-needed ,(runconfigs-get ht quotedcmd))) ;; (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ((runconfigs-get rget) `(noeval-needed ,(runconfigs-get ht cmd))) ;; (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) (else `(#f ,(conc "cmd: " cmd " not recognised"))))))) - (match + + (match fullcmd (('eval-needed newres) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\", fullcmd="fullcmd", exn=" exn) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (print "exn=" (condition->list exn)) - (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) (if (or allow-system (not (member cmdtype '("system" "shell" "sh")))) - (with-input-from-string newres - (lambda () - (set! result (if env-to-use - ((eval (read) env-to-use) ht) - ((eval (read)) ht) - )))) - (set! result (conc "#{(" cmdtype ") " cmd "}"))))) - (('noeval-needed newres)(set! result newres)) + (begin + ;; (debug:print 0 *default-log-port* "eval: "newres) + (with-input-from-string newres + (lambda () + (set! result + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\", eval-needed, newres="newres", exn="(condition->list exn)) + (debug:print 0 *default-log-port* " message1: " ((condition-property-accessor 'exn 'message) exn)) + (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " newres))) + (if env-to-use + ;; ((eval (read) env-to-use) ht) disable until we fix this. 2/10/22 Martin + ((eval (read)) ht) + ((eval (read)) ht) + )))))) + (set! result (conc "#{(" cmdtype ") " cmd "}")) + ) + ) + (('noeval-needed newres) + (set! result newres)) (else ;; (#f errres) - (debug:print 0 *default-log-port* "WARNING: failed to process config input \""l"\", fullcmd="fullcmd"."))) + (debug:print 0 *default-log-port* "WARNING: failed to process config input \""l"\", fullcmd="fullcmd"."))) + ;; we process as a result (let ((delta (- (current-seconds) start-time))) (debug:print-info (if (> delta 2) 0 9) *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result)) (conc prestr result poststr))) @@ -1081,11 +1102,11 @@ (let ((result (configf:process-one matchdat l ht allow-system env-to-use linenum))) (loop result)) res)) res))) -(define (configf:process-line-old l ht allow-system env-to-use #!key (linenum #f)) +#;(define (configf:process-line-old l ht allow-system env-to-use #!key (linenum #f)) (let loop ((res l)) (if (string? res) (let ((matchdat (string-search configf:var-expand-regex res))) (if matchdat (let* ((prestr (list-ref matchdat 1)) @@ -1095,12 +1116,11 @@ (result #f) (start-time (current-seconds)) (cmdsym (string->symbol cmdtype)) (fullcmd (conc configf:std-imports - "(import chicken.process-context.posix)" - "(define setenv set-environment-variable)" + ;;"(define setenv set-environment-variable)" (case cmdsym ((scheme scm) (conc "(lambda (ht)" cmd ")")) ((system) (conc "(lambda (ht)(configf:system ht \"" cmd "\"))")) ((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))")) ((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))")) @@ -1122,11 +1142,11 @@ ;; (print "fullcmd=" fullcmd) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\", exn=" exn) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* " message2: " ((condition-property-accessor 'exn 'message) exn)) ;; (print "exn=" (condition->list exn)) (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) (if (or allow-system (not (member cmdtype '("system" "shell" "sh")))) (with-input-from-string fullcmd @@ -1190,11 +1210,11 @@ ;;====================================================================== ;; DO THE LOCKING AROUND THE CALL ;;====================================================================== ;; -(define (configf:write-alist cdat fname) +(define (configf:write-alist cdat fname #!optional (check-written #f)) ;; (if (not (common:faux-lock fname)) ;; (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname) ((my-with-lock) fname (lambda () @@ -1202,26 +1222,27 @@ (res (begin (with-output-to-file fname ;; first write out the file (lambda () (pp dat))) - ;; I don't like this. It makes write-alist opaque and complicated. -mrw- - (if (file-exists? fname) ;; now verify it is readable - (if (configf:read-alist fname) - #t ;; data is good. - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) - #f) - (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") - (delete-file fname)) - #f)) - #f)))) + ;; I don't like this. It makes write-alist complicated + ;; move to something like write-and-verify-alist. -mrw- + (if check-written + (if (file-exists? fname) ;; now verify it is readable + (if (configf:read-alist fname) + 'data-good ;; data is good. + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) + 'data-bad) + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + (delete-file fname))) + 'data-not-there) + 'data-not-checked)))) res)))) (define (common:get-fields cfgdat) (let ((fields (hash-table-ref/default cfgdat "fields" '()))) (map car fields))) ) Index: configure ================================================================== --- configure +++ configure @@ -15,87 +15,18 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Megatest. If not, see . -# Configure the build - -if [[ "$1"x == "x" ]];then - PREFIX=$PWD -else - PREFIX=$1 -fi - - -#====================================================================== -# Configure stuff needed for eggs -#====================================================================== - -function configure_dependencies () { - - #====================================================================== - # libnanomsg - #====================================================================== - - if [[ ! $(ls /usr/lib/*/libnanomsg*) ]];then - echo "libnanomsg build needed." - echo "BUILD_NANOMSG=yes" >> makefile.inc - fi - - #====================================================================== - # postgresql libraries - #====================================================================== - - if [[ ! $(ls /usr/lib/*/libpq.*) ]];then - echo "Postgresql build needed." - echo "BUILD_POSTGRES=yes" >> makefile.inc - fi - - if [[ ! $(ls /usr/lib/*/libsqlite3.*) ]];then - echo "Sqlite3 build needed." - echo "BUILD_SQLITE3=yes" >> makefile.inc - fi - -} - -#====================================================================== -# Initialize makefile.inc -#====================================================================== - -echo "" > makefile.inc - -#====================================================================== -# Do we need Chicken? -#====================================================================== - -if [[ -e /usr/bin/sw_vers ]]; then - ARCHSTR=$(/usr/bin/sw_vers -productVersion) -else - ARCHSTR=$(lsb_release -sr) -fi - -echo "CHICKEN_PREFIX=$PREFIX/.$ARCHSTR" >> makefile.inc -CHICKEN_PREFIX=$PREFIX/bin/.$ARCHSTR - -if [[ ! $(type csi) ]];then - echo "Chicken build needed." - echo "BUILD_CHICKEN=yes" >> makefile.inc - configure_dependencies - echo "include chicken.makefile" >> makefile.inc -else - echo "CSIPATH=$(which csi)" >> makefile.inc - CSIPATH=$(which csi) - echo "CKPATH=$(dirname $(dirname $CSIPATH))" >> makefile.inc -fi - -# Make setup scripts -echo "#!/bin/bash" > setup.sh -echo "export PATH=$CHICKEN_PREFIX/bin:\$PATH" >> setup.sh -echo "export LD_LIBRARY_PATH=$CHICKEN_PREFIX/lib" >> setup.sh -echo 'exec "$@"' >> setup.sh -chmod a+x setup.sh - -echo "setenv PATH $CHICKEN_PREFIX/bin:\$PATH" > setup.csh -echo "setenv LD_LIBRARY_PATH $CHICKEN_PREFIX/lib" >> setup.csh - -echo "All done creating makefile.inc, feel free to edit it!" -echo "run \"setup.sh bash\" or source setup.csh to get PATH and LD_LIBRARY_PATH adjusted" +# Flavors include: simple, full and none + +# look at build.config (not a version controlled file and +# create ulex.scm and dbmgr.scm + +if [[ -e transport-flavor ]];then + FLAVOR=$(cat transport-flavor) +else + FLAVOR=simple +fi + +sed -e "s/FLAVOR/$FLAVOR/" ulex.scm.template > ulex.scm +sed -e "s/FLAVOR/$FLAVOR/" dbmgrmod.scm.template > dbmgrmod.scm Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -98,10 +98,11 @@ typed-records sparse-vectors format srfi-4 srfi-14 + srfi-18 ) ;; (include "common_records.scm") ;; (include "db_records.scm") ;; (include "run_records.scm") @@ -198,11 +199,11 @@ (print ". Done. All ok."))) (if (not (null? remargs)) (begin (print "Unrecognised arguments: " (string-intersperse remargs " ")) - (exit))) + )) (if (args:get-arg "-h") (begin (print help) (exit))) @@ -248,33 +249,33 @@ ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) ;;) -;; data common to all tabs goes here +;;;; data common to all tabs goes here +;;;; +;;(defstruct dboard:commondat +;; ((curr-tab-num 0) : number) +;; please-update +;; tabdats +;; update-mutex +;; updaters +;; updating +;; uidat ;; needs to move to tabdat at some time +;; hide-not-hide-tabs +;; ) ;; -(defstruct dboard:commondat - ((curr-tab-num 0) : number) - please-update - tabdats - update-mutex - updaters - updating - uidat ;; needs to move to tabdat at some time - hide-not-hide-tabs - ) - -(define (dboard:commondat-make) - (make-dboard:commondat - curr-tab-num: 0 - tabdats: (make-hash-table) - please-update: #t - update-mutex: (make-mutex) - updaters: (make-hash-table) - updating: #f - hide-not-hide-tabs: #f - )) +;;(define (dboard:commondat-make) +;; (make-dboard:commondat +;; curr-tab-num: 0 +;; tabdats: (make-hash-table) +;; please-update: #t +;; update-mutex: (make-mutex) +;; updaters: (make-hash-table) +;; updating: #f +;; hide-not-hide-tabs: #f +;; )) ;;====================================================================== ;; buttons color using image ;;====================================================================== @@ -880,15 +881,13 @@ (when (> elapsed-time 2) (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.") (let* ((old-val (iup:attribute *tim* "TIME")) (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val))))))) (if (< (string->number new-val) 5000) - ((debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) - (iup:attribute-set! *tim* "TIME" new-val)))) - - - ) + (begin + (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) + (iup:attribute-set! *tim* "TIME" new-val))))) (dboard:tabdat-allruns-set! tabdat new-res) maxtests) (if (> (dboard:rundat-run-data-offset run-struct) 0) (loop run tal new-res newmaxtests) ;; not done getting data for this run (loop (car tal)(cdr tal) new-res newmaxtests))))))) @@ -1051,11 +1050,11 @@ ) ;; create a concise list of test names ;; (for-each (lambda (rundat) - (if rundat + (if (dboard:rundat? rundat) (let* ((testdats (dboard:rundat-tests rundat)) (testnames (map test:test-get-fullname (hash-table-values testdats)))) (dcommon:rundat-copy-tests-to-by-name rundat) ;; for the normalized list of testnames (union of all runs) (if (not (and (dboard:tabdat-hide-empty-runs tabdat) @@ -1237,11 +1236,11 @@ (let ((newval (car values))) (iup:attribute-set! lb "VALUE" newval) newval)))))) (define (dashboard:update-target-selector tabdat #!key (action-proc #f)) - (let* ((runconf-targs (common:get-runconfig-targets *configdat*)) + (let* ((runconf-targs (common:get-runconfig-targets *runconfigdat*)) (key-lbs (dboard:tabdat-key-listboxes tabdat)) (db-target-dat (rmt:get-targets)) (header (vector-ref db-target-dat 0)) (db-targets (vector-ref db-target-dat 1)) (munge-target (lambda (x) ;; create a target vector from a string. Pad with na if needed. @@ -1341,11 +1340,11 @@ ;; used by run-controls ;; (define (dashboard:update-tree-selector tabdat #!key (action-proc #f)) (let* ((tb (dboard:tabdat-runs-tree tabdat)) - (runconf-targs (common:get-runconfig-targets *configdat*)) + (runconf-targs (common:get-runconfig-targets *runconfigdat*)) (db-target-dat (rmt:get-targets)) (runs-tree-ht (dboard:tabdat-runs-tree-ht tabdat)) (header (vector-ref db-target-dat 0)) (db-targets (vector-ref db-target-dat 1)) (munge-target (lambda (x) ;; create a target vector from a string. Pad with na if needed. @@ -3248,14 +3247,14 @@ (reverse (sqlite3:fold-row (lambda (res t var val) (cons (vector t var val) res)) '() db all-dat-qrystr))) - (let ((zeropt (handle-exceptions - exn - #f - (sqlite3:first-row db all-dat-qrystr)))) + (let ((zeropt (condition-case + (sqlite3:first-row db all-dat-qrystr) + (exn (busy)(db:generic-error-printout exn "ERROR: database " dbdef + " is locked. Try copying to another location, remove original and copy back."))))) (if zeropt ;; NOTE: Add zeropt to the beginning of the list as the list was reversed above. (hash-table-set! res-ht fieldname (cons (apply vector tstart (cdr zeropt)) @@ -3623,10 +3622,26 @@ (runloop (car runtal)(cdr runtal) (+ run-num 1) newdoneruns) (escapeloop #t) ;; (dboard:tabdat-layout-update-ok tabdat) ))))))))) ;; new-run-start-row ))) (debug:print 2 *default-log-port* "no tabdat for run-times-tab-updater")))) + +(define (dashboard:calc-key-patterns tabdat) + ;; generate key patterns from the target stored in tabdat + (let* ((dbkeys (dboard:tabdat-dbkeys tabdat))) + (let ((fres (if (dboard:tabdat-target tabdat) + (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%")))) + (map (lambda (k v)(list k v)) dbkeys ptparts)) + (let ((res '())) + (for-each (lambda (key) + (if (not (equal? key "runname")) + (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f))) + (if val (set! res (cons (list key val) res)))))) + dbkeys) + res)))) + fres))) + ;; handy trick for printing a record ;; ;; (pp (dboard:tabdat->alist tabdat)) ;; @@ -3635,28 +3650,20 @@ ;; (define (tabdat-values tabdat) ;; runs update-rundat using the various filters from the gui ;; (define (dashboard:do-update-rundat tabdat) - (dboard:update-rundat - tabdat - (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%") - (dboard:tabdat-numruns tabdat) - (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") - ;; generate key patterns from the target stored in tabdat - (let* ((dbkeys (dboard:tabdat-dbkeys tabdat))) - (let ((fres (if (dboard:tabdat-target tabdat) - (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%")))) - (map (lambda (k v)(list k v)) dbkeys ptparts)) - (let ((res '())) - (for-each (lambda (key) - (if (not (equal? key "runname")) - (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f))) - (if val (set! res (cons (list key val) res)))))) - dbkeys) - res)))) - fres)))) + (let* ((runnamepatt (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%")) + (numruns (dboard:tabdat-numruns tabdat)) + (testnamepatt (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%")) + (keypatts (dashboard:calc-key-patterns tabdat))) + (dboard:update-rundat + tabdat + runnamepatt + numruns + testnamepatt + keypatts))) (define (dashboard:runs-tab-updater commondat tab-num) (debug:catch-and-dump (lambda () (let* ((tabdat (dboard:common-get-tabdat commondat tab-num: tab-num)) @@ -3754,7 +3761,12 @@ ;; ease debugging by loading ~/.dashboardrc (let ((debugcontrolf (get-debugcontrolf))) (if debugcontrolf (load debugcontrolf))) -(main) +(import srfi-18) + +(thread-join! + (thread-start! + (make-thread main "main"))) + ADDED dbmgrmod.scm.template Index: dbmgrmod.scm.template ================================================================== --- /dev/null +++ dbmgrmod.scm.template @@ -0,0 +1,21 @@ +;;====================================================================== +;; Copyright 2022, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(include "ulex-FLAVOR/dbmgr.scm") Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -422,32 +422,22 @@ ;; create and fill the inmemory db ;; assemble into dbr:dbdat struct and return ;; (define (db:open-dbdat apath dbfile dbinit-proc) (let* ((db (db:open-run-db dbfile dbinit-proc)) - ;; (inmem (db:open-inmem-db dbinit-proc)) + (inmem (db:open-inmem-db dbinit-proc)) (dbdat (make-dbr:dbdat - db: #f ;; db - inmem: db ;; inmem + db: db + inmem: inmem ;; run-id: run-id ;; no can do, there are many run-id values that point to single db fname: dbfile))) + (assert (and (sqlite3:database? db)(sqlite3:database? inmem)) + "FATAL: should have both inmem and on-disk db at this time.") ;; now sync the disk file data into the inmemory db - ;; (db:sync-tables (db:sync-all-tables-list) '("last_update" . 0) db inmem) + (db:sync-tables (db:sync-all-tables-list) '("last_update" . 0) db inmem) ;; (sqlite3:finalize! db) ;; open and close every sync dbdat)) -;; (define (db:open-dbdat apath dbfile dbinit-proc) -;; (let* ((db (db:open-run-db dbfile dbinit-proc)) -;; (inmem (db:open-inmem-db dbinit-proc)) -;; (dbdat (make-dbr:dbdat -;; db: #f ;; db -;; inmem: inmem -;; ;; run-id: run-id ;; no can do, there are many run-id values that point to single db -;; fname: dbfile))) -;; ;; now sync the disk file data into the inmemory db -;; (db:sync-tables (db:sync-all-tables-list) '("last_update" . 0) db inmem) -;; (sqlite3:finalize! db) ;; open and close every sync -;; dbdat)) ;; open the disk database file ;; NOTE: May need to add locking to file create process here ;; returns an sqlite3 database handle ;; @@ -501,11 +491,20 @@ (define (db:setup db-file) ;; run-id) (assert *toppath* "FATAL: db:setup called before toppath is available.") (let* ((dbstruct (or *dbstruct-db* (make-dbr:dbstruct)))) (db:get-dbdat dbstruct *toppath* db-file) (if (not *dbstruct-db*)(set! *dbstruct-db* dbstruct)) + (assert (db:check-setup dbstruct *toppath* db-file) "FATAL: db:setup did NOT complete properly") dbstruct)) + +(define (db:check-setup dbstruct apath dbfile) + (let* ((dbdat (db:get-dbdat dbstruct apath dbfile)) + (dbfullname (conc apath "/" dbfile)) + (db (dbr:dbdat-db dbdat)) ;; (db:open-run-db dbfullname db:initialize-db)) ;; + (inmem (dbr:dbdat-inmem dbdat))) + (and (sqlite3:database? db) + (sqlite3:database? inmem)))) ;;====================================================================== ;; setting/getting a lock on the db for only one server per db ;; ;; NOTE: @@ -693,33 +692,36 @@ ;; NOTE: touched logic is disabled/not done ;; sync run to disk if touched ;; (define (db:sync-inmem->disk dbstruct apath dbfile #!key (force-sync #f)) - #f) ;; disabled -;; (let* ((dbdat (db:get-dbdat dbstruct apath dbfile)) -;; (dbfullname (conc apath "/" dbfile)) -;; (db (db:open-run-db dbfullname db:initialize-db)) ;; (dbr:dbdat-db dbdat)) -;; (inmem (dbr:dbdat-inmem dbdat)) -;; (start-t (current-seconds)) -;; (last-update (dbr:dbdat-last-write dbdat)) -;; (last-sync (dbr:dbdat-last-sync dbdat))) -;; (debug:print-info 0 *default-log-port* "Syncing for dbfile: "dbfile", last-update: "last-update", last-sync: "last-sync) -;; (mutex-lock! *db-multi-sync-mutex*) -;; (let* ((update_info (cons "last_update" (if force-sync 0 last-update))) ;; "last_update")) -;; (need-sync (or force-sync (>= last-update last-sync)))) -;; (if need-sync -;; (begin -;; (db:sync-tables (db:sync-all-tables-list) update_info inmem db) -;; (dbr:dbdat-last-sync-set! dbdat start-t)) -;; (debug:print 0 *default-log-port* "Skipping sync as nothing touched."))) -;; (sqlite3:finalize! db) -;; (mutex-unlock! *db-multi-sync-mutex*))) - + (let* ((dbdat (db:get-dbdat dbstruct apath dbfile)) + (dbfullname (conc apath "/" dbfile)) + (db (dbr:dbdat-db dbdat)) ;; (db:open-run-db dbfullname db:initialize-db)) ;; + (inmem (dbr:dbdat-inmem dbdat)) + (start-t (current-seconds)) + (last-update (dbr:dbdat-last-write dbdat)) + (last-sync (dbr:dbdat-last-sync dbdat))) + (if (and (sqlite3:database? db) + (sqlite3:database? inmem)) + (begin + (debug:print-info 0 *default-log-port* "Syncing for dbfile: "dbfile", last-update: "last-update", last-sync: "last-sync) + (mutex-lock! *db-multi-sync-mutex*) + (let* ((update_info (cons "last_update" (if force-sync 0 last-update))) ;; "last_update")) + (need-sync (or force-sync (>= last-update last-sync)))) + (if need-sync + (begin + (db:sync-tables (db:sync-all-tables-list) update_info inmem db) + (dbr:dbdat-last-sync-set! dbdat start-t)) + (debug:print 0 *default-log-port* "Skipping sync as nothing touched."))) + ;; (sqlite3:finalize! db) + (mutex-unlock! *db-multi-sync-mutex*)) + (debug:print-info 0 *default-log-port* "Skipping sync due to databases not being open.")))) + ;; TODO: Add final sync to this ;; -#;(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) +(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) (if (<= try-num 0) #f (handle-exceptions exn (begin @@ -733,11 +735,11 @@ (sqlite3:finalize! db) #t) #f)))) ;; close all opened run-id dbs -#;(define (db:close-all dbstruct) +(define (db:close-all dbstruct) (assert (dbr:dbstruct? dbstruct) "FATAL: db:close-all called with dbstruct not set up.") (handle-exceptions exn (begin (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) @@ -1053,11 +1055,15 @@ (start-time (current-milliseconds)) (tot-count 0)) (for-each ;; table (lambda (tabledat) - (db:sync-one-table fromdb todb tabledat last-update numrecs)) + (condition-case + (db:sync-one-table fromdb todb tabledat last-update numrecs) + ;; if db is busy, take a break and try one more time + (exn (busy)(thread-sleep! 0.5) + (db:sync-one-table fromdb todb tabledat last-update numrecs)))) tbls) (let* ((runtime (- (current-milliseconds) start-time)) (should-print (or (debug:debug-mode 12) (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. @@ -1712,13 +1718,13 @@ state TEXT DEFAULT 'new', status TEXT DEFAULT 'n/a', archive_type TEXT DEFAULT 'bup', du INTEGER, archive_path TEXT);"))) - (print "creating triggers from init") - (db:create-triggers db) - db)) ;; ) + (debug:print 0 *default-log-port* "creating triggers from init") + (db:create-triggers db) + db)) ;; ) ;;====================================================================== ;; A R C H I V E S ;;====================================================================== @@ -1938,21 +1944,29 @@ (null? toplevels)) #f #t))))) (define (db:get-status-from-final-status-file run-dir) - (let ((infile (conc run-dir "/.final-status"))) + (let* ((infile (conc run-dir "/.final-status")) + (found (file-exists? infile))) ;; first verify we are able to write the output file - (if (not (file-readable? infile)) - (begin - (debug:print 0 *default-log-port* "ERROR: cannot read " infile) - (debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir) - #f - ) - (with-input-from-file infile read-lines) - ))) - + (cond + ((not found) #f) + ((and (file-exists? infile) + (not (file-readable? infile))) + (debug:print 0 *default-log-port* "ERROR: cannot read " infile) + (debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir) + #f) + (else + (let ((res (condition-case + (with-input-from-file infile read-lines) + (exn (i/o file) #f)))) + (cond + ((equal? res "#!eof") #f) + ((eof-object? res) #f) + (else res))))))) + ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); @@ -2413,18 +2427,17 @@ (if (or (null? header) (not row)) #f (let loop ((hed (car header)) (tal (cdr header)) (n 0)) - (if (equal? hed field) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" - row " header=" header " field=" field ", exn=" exn) - #f) - (vector-ref row n)) + (if (equal? hed field);;(condition-case (vector-ref #(1 2 3) 3)(exn (bounds)(print "out of bounds"))) + (condition-case + (vector-ref row n) + (exn (bounds) + (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" + row " header=" header " field=" field ", exn=" exn) + #f)) (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) ;; Accessors for the header/data structure ;; get rows and header from (define (db:get-header vec)(vector-ref vec 0)) @@ -2951,11 +2964,11 @@ ;; this is inconsistent with get-runs but it makes some sense. ;; (define (db:get-run-info dbstruct run-id) ;;(if (hash-table-ref/default *run-info-cache* run-id #f) ;; (hash-table-ref *run-info-cache* run-id) - (let* ((res (vector #f #f #f #f)) + (let* ((res (make-vector 11 #f)) (keys (db:get-keys dbstruct)) (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;; "area_id")) (header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) @@ -3026,12 +3039,10 @@ (define (db:set-run-state-status dbstruct run-id state status ) (db:with-db dbstruct #f #f (lambda (db) (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) - - (define (db:get-run-status dbstruct run-id) (let ((res "n/a")) (db:with-db dbstruct #f #f @@ -3445,21 +3456,21 @@ run-id)))) ;; NOT IN (SELECT id FROM runs WHERE state='deleted');") ;; NEW BEHAVIOR: Look only at single run with run-id ;; ;; (define (db:get-running-stats dbstruct run-id) -(define (db:get-count-tests-running-for-run-id dbstruct run-id) ;; fastmode) - (let* ((qry ;; (if fastmode - ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;" - "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; ) +(define (db:get-count-tests-running-for-run-id dbstruct run-id) + (let* ((qry "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; ) (db:with-db dbstruct run-id #f (lambda (db) - (let* ((stmth (db:get-cache-stmth dbstruct db qry))) - (sqlite3:first-result stmth run-id)))))) + (let* (#;(stmth (db:get-cache-stmth dbstruct db qry))) + #;(sqlite3:first-result stmth run-id) + (sqlite3:first-result db qry run-id) + ))))) ;; For a given testname how many items are running? Used to determine ;; probability for regenerating html ;; (define (db:get-count-tests-running-for-testname dbstruct run-id testname) @@ -4176,14 +4187,14 @@ (tl-test-id (if tl-testdat (db:test-get-id tl-testdat) #f))) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (db:general-call dbstruct 'set-test-start-time run-id (list test-id))) - ;; (mutex-lock! *db-transaction-mutex*) (db:with-db dbstruct run-id #f (lambda (db) + (mutex-lock! *db-transaction-mutex*) (let ((tr-res (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction @@ -4202,11 +4213,11 @@ state-status-counts))); end debug:print (if tl-test-id (db:db-test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct )))))) - ;; (mutex-unlock! *db-transaction-mutex*) + (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) tr-res))) ;; this was moved out of test-set-state-status (mt:process-triggers dbstruct run-id test-id state status))) @@ -4269,27 +4280,30 @@ ;; NB// Pass the db so it is part of the transaction (list newstate newstatus)))) (define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) - ;; (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) - (state-statuses (db:roll-up-rules state-status-counts #f #f )) - (newstate (car state-statuses)) - (newstatus (cadr state-statuses))) - (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) - (db:set-run-state-status dbstruct run-id newstate newstatus ))))))) - ;; (mutex-unlock! *db-transaction-mutex*) - tr-res)))) - + (db:with-db + dbstruct #f #f + (lambda (db) + (mutex-lock! *db-transaction-mutex*) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) + (state-statuses (db:roll-up-rules state-status-counts #f #f )) + (newstate (car state-statuses)) + (newstatus (cadr state-statuses))) + (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) + (begin + (db:set-run-state-status dbstruct run-id newstate newstatus) + #t) ;; changes made + #f) ;; no changes + ))))) + (mutex-unlock! *db-transaction-mutex*) + tr-res)))) (define (db:get-all-state-status-counts-for-run dbstruct run-id) (let* ((test-count-recs (db:with-db dbstruct #f #f (lambda (db) @@ -4844,14 +4858,11 @@ exn (begin (debug:print 0 *default-log-port* "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl ", exn=" exn) res) - (string-substitute patt repl res)) - - - ) + (string-substitute patt repl res))) (begin (debug:print 0 *default-log-port* "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) res)))) (if (null? tal) @@ -5836,43 +5847,49 @@ (define (db:register-server dbstruct host port servkey pid ipaddr apath dbname) (db:with-db dbstruct #f #f (lambda (db) + (mutex-lock! *db-transaction-mutex*) (sqlite3:with-transaction db (lambda () - (let* ((sinfo (db:get-server-info dbstruct apath dbname))) - (if sinfo - (begin - (debug:print-info 0 *default-log-port* "Server already running at "sinfo ", while trying to register server " host":"port) - #f) ;; server already registered - (begin - (sqlite3:execute db "INSERT INTO servers (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);" - host port servkey pid ipaddr apath dbname) - (db:get-server-info dbstruct apath dbname))))))))) + (let* ((sinfo (db:get-server-info dbstruct apath dbname)) + (res (if sinfo + (begin + (debug:print-info 0 *default-log-port* "Server already running at "sinfo ", while trying to register server " host":"port) + #f) ;; server already registered + (begin + (sqlite3:execute db "INSERT INTO servers (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);" + host port servkey pid ipaddr apath dbname) + (db:get-server-info dbstruct apath dbname))))) + (mutex-unlock! *db-transaction-mutex*) + res)))))) ;; run this one in a transaction where first check if host:port is taken (define (db:deregister-server dbstruct host port servkey pid ipaddr apath dbname) (db:with-db dbstruct #f #f (lambda (db) - (sqlite3:with-transaction - db - (lambda () - (let* ((sinfo (db:get-server-info dbstruct apath dbname))) - (if (not sinfo) - (begin - (debug:print-info 0 *default-log-port* "Server already removed for "apath", "dbname) ;; at "sinfo ", while trying to register server " host":"port) - #f) ;; server already deregistered - (begin - (sqlite3:execute db "DELETE FROM servers WHERE apath=? AND dbname=?;" ;; (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);" - ;; host port servkey pid ipaddr - apath dbname) - #;(db:get-server-info dbstruct apath dbname) - 'done)))))))) + (mutex-lock! *db-transaction-mutex*) + (let ((res (sqlite3:with-transaction + db + (lambda () + (let* ((sinfo (db:get-server-info dbstruct apath dbname))) + (if (not sinfo) + (begin + (debug:print-info 0 *default-log-port* "Server already removed for "apath", "dbname) ;; at "sinfo ", while trying to register server " host":"port) + #f) ;; server already deregistered + (begin + (sqlite3:execute db "DELETE FROM servers WHERE apath=? AND dbname=?;" ;; (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);" + ;; host port servkey pid ipaddr + apath dbname) + #;(db:get-server-info dbstruct apath dbname) + 'done))))))) + (mutex-unlock! *db-transaction-mutex*) + res)))) (define (db:get-server-info dbstruct apath dbname) (db:with-db dbstruct #f #f Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -26,10 +26,11 @@ (declare (uses commonmod)) (declare (uses configfmod)) (declare (uses rmtmod)) (declare (uses mtargs)) (declare (uses testsmod)) +(declare (uses dbmgrmod)) (module dcommon * (import scheme @@ -62,10 +63,11 @@ srfi-1 ) (import mtver dbmod + dbmgrmod commonmod debugprint configfmod rmtmod ;; gutils @@ -547,11 +549,13 @@ (if (and (hash-table? src-ht)(hash-table? trg-ht)) (begin (hash-table-clear! trg-ht) (for-each (lambda (testdat) - (hash-table-set! trg-ht (test:test-get-fullname testdat) testdat)) + (if (test:testdat? testdat) + (hash-table-set! trg-ht (test:test-get-fullname testdat) testdat) + (debug:print 0 *default-log-port* "WARNING: invalid testdat record: "testdat))) (hash-table-values src-ht))) (debug:print 0 *default-log-port* "WARNING: src-ht " src-ht " trg-ht " trg-ht)))) ;;====================================================================== Index: debugprint.scm ================================================================== --- debugprint.scm +++ debugprint.scm @@ -6,10 +6,12 @@ ;;(import scheme chicken data-structures extras files ports) (import scheme chicken.base chicken.string + chicken.time + chicken.time.posix chicken.port chicken.process-context chicken.process-context.posix (prefix mtargs args:) @@ -21,20 +23,20 @@ ;; debug stuff ;;====================================================================== (define verbosity (make-parameter '())) (define *default-log-port* (current-error-port)) -(define debug:print-logger (make-parameter #f)) ;; se to a proc to call on every logging print +(define debug:print-logger (make-parameter #f)) ;; set to a proc to call on every logging print (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") (args:get-arg "-debug-noprop") (get-environment-variable "MT_DEBUG_MODE")))) (verbosity (debug:calc-verbosity debugstr 'q)) (debug:check-verbosity (verbosity) debugstr) ;; if we were handed a bad verbosity rule then we will override it with 1 and continue - (if (verbosity)(verbosity 1)) + (if (not (verbosity))(verbosity 1)) (if (and (not (args:get-arg "-debug-noprop")) (or (args:get-arg "-debug") (not (get-environment-variable "MT_DEBUG_MODE")))) (set-environment-variable! "MT_DEBUG_MODE" (if (list? (verbosity)) (string-intersperse (map conc (verbosity)) ",") @@ -109,48 +111,56 @@ (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " (string-intersperse (map conc params) " ") "; " (string-intersperse (command-line-arguments) " "))))) -(define (debug:print n e . params) +(define debug:enable-timestamp (make-parameter #t)) + +(define (debug:timestamp) + (if (debug:enable-timestamp) + (conc (time->string + (seconds->local-time (current-seconds)) "%H:%M:%S") " ") + "")) + + (define (debug:print n e . params) (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) (lambda () ;; (if *logging* ;; (db:log-event (apply conc params)) - (apply print params) - (debug:handle-remote-logging params) + (apply print (debug:timestamp) params) + ;; (debug:handle-remote-logging params) ))) #t ;; only here to make remote stuff happy. It'd be nice to fix that ... ) (define (debug:print-error n e . params) ;; normal print (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () - (apply print "ERROR: " params) - (debug:handle-remote-logging (cons "ERROR: " params)) + (apply print "ERROR: " (debug:timestamp) params) + ;; (debug:handle-remote-logging (cons "ERROR: " params)) ))) ;; pass important messages to stderr (if (and (eq? n 0)(not (eq? e (current-error-port)))) (with-output-to-port (current-error-port) (lambda () - (apply print "ERROR: " params) + (apply print "ERROR: " (debug:timestamp) params) )))) (define (debug:print-info n e . params) (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () - (apply print "INFO: (" n ") " params) ;; res) - (debug:handle-remote-logging (cons "INFO: " params)) + (apply print "INFO: (" n ") "(debug:timestamp) params) ;; res) + ;; (debug:handle-remote-logging (cons "INFO: " params)) )))) (define (debug:print-warn n e . params) (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () - (apply print "WARN: (" n ") " params) ;; res) - (debug:handle-remote-logging (cons "WARN: " params)) + (apply print "WARN: (" n ") " (debug:timestamp) params) ;; res) + ;; (debug:handle-remote-logging (cons "WARN: " params)) )))) ) DELETED hostinfo.scm Index: hostinfo.scm ================================================================== --- hostinfo.scm +++ /dev/null @@ -1,23 +0,0 @@ -;;====================================================================== -;; Copyright 2019, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -;;====================================================================== - -(declare (unit hostinfo)) - -(include "hostinfo/hostinfo.scm") DELETED hostinfo/hostinfo.h Index: hostinfo/hostinfo.h ================================================================== --- hostinfo/hostinfo.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifdef _WIN32 -# include -# include - -const char *inet_ntop(int af, const void *src, char *dst, socklen_t cnt) -{ - if (af == AF_INET) - { - struct sockaddr_in in; - memset(&in, 0, sizeof(in)); - in.sin_family = AF_INET; - memcpy(&in.sin_addr, src, sizeof(struct in_addr)); - getnameinfo((struct sockaddr *)&in, sizeof(struct -sockaddr_in), dst, cnt, NULL, 0, NI_NUMERICHOST); - return dst; - } - else if (af == AF_INET6) - { - struct sockaddr_in6 in; - memset(&in, 0, sizeof(in)); - in.sin6_family = AF_INET6; - memcpy(&in.sin6_addr, src, sizeof(struct in_addr6)); - getnameinfo((struct sockaddr *)&in, sizeof(struct -sockaddr_in6), dst, cnt, NULL, 0, NI_NUMERICHOST); - return dst; - } - return NULL; -} - -int inet_pton(int af, const char *src, void *dst) -{ - struct addrinfo hints, *res, *ressave; - - memset(&hints, 0, sizeof(struct addrinfo)); - hints.ai_family = af; - - if (getaddrinfo(src, NULL, &hints, &res) != 0) - { - return -1; - } - - ressave = res; - - while (res) - { - memcpy(dst, res->ai_addr, res->ai_addrlen); - res = res->ai_next; - } - - freeaddrinfo(ressave); - return 0; -} - -#else - # include - # include - # include /* in_addr */ -# include /* inet_ntop, ... */ -# include /* hostent, gethostby* */ -# include -#endif DELETED hostinfo/hostinfo.meta Index: hostinfo/hostinfo.meta ================================================================== --- hostinfo/hostinfo.meta +++ /dev/null @@ -1,9 +0,0 @@ -;;; hostinfo.meta -*- Hen -*- -((synopsis "Look up host, protocol, and service information") - (author "Jim Ursetto") - (needs vector-lib foreigners) - (egg "hostinfo.egg") - (files "hostinfo.h" "hostinfo.meta" "hostinfo.scm" "hostinfo.setup") - (license "BSD") - (doc-from-wiki) - (category net)) DELETED hostinfo/hostinfo.scm Index: hostinfo/hostinfo.scm ================================================================== --- hostinfo/hostinfo.scm +++ /dev/null @@ -1,489 +0,0 @@ -;;; hostinfo extension to Chicken Scheme -;;; Description: Look up host, service, and protocol information - -;; Copyright (c) 2005-2008, Jim Ursetto. All rights reserved. -;; -;; Redistribution and use in source and binary forms, with or without -;; modification, are permitted provided that the following conditions are met: -;; -;; Redistributions of source code must retain the above copyright notice, -;; this list of conditions and the following disclaimer. Redistributions in -;; binary form must reproduce the above copyright notice, this list of -;; conditions and the following disclaimer in the documentation and/or -;; other materials provided with the distribution. Neither the name of the -;; author nor the names of its contributors may be used to endorse or -;; promote products derived from this software without specific prior -;; written permission. -;; -;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -;; THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -;; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR -;; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -;;; - -;; This extension performs host, protocol and service information lookups -;; via underlying calls to gethostbyname(3), getprotobyname(3), and -;; getservbyname(3). Depending on your system, this may consult DNS, -;; NIS, /etc/hosts, /etc/services, /etc/protocols, and so on. - -;; A simple interface is provided for the most commmon queries. Also -;; provided is a more comprehensive interface using records, which -;; contain all data available in a lookup. - -;; IP addresses are represented by 4 (IPv4) or 16 (IPv6) byte -;; u8vectors. The interface requires, and returns, addresses in this -;; format; functions are provided to convert between the string and -;; u8vector representations. However, the "do what I want" procedures -;; (e.g. host-information) will do the conversion for you. - -;; Caveats: -;; - IPv6 addresses can be converted to and from strings, and the underlying structure -;; supports IPv6, but lookup of IPv6 addresses and records is not currently implemented. -;; - array0->string-vector and array0->bytevector-vector contain redundant code. -;; - host, services, and protocol-information check their argument types, even -;; though the underlying code already does it. - -(declare - (fixnum)) - -(cond-expand [paranoia] - [else - (declare (no-bound-checks))]) - -#> #include "../hostinfo/hostinfo.h" <# - -;; (require-extension srfi-4 lolevel posix) - -(module hostinfo -;;; Short and sweet lookups - (current-hostname - hostname->ip ip->hostname - protocol-name->number protocol-number->name - service-port->name service-name->port -;;; Entire host, protocol or service record lookup - hostname->hostinfo ip->hostinfo - protocol-name->protoinfo protocol-number->protoinfo - service-port->servinfo service-name->servinfo -;;; Record accessors and predicates - hostinfo? hostinfo-name hostinfo-aliases hostinfo-addresses - hostinfo-address hostinfo-type hostinfo-length - protoinfo? protoinfo-name protoinfo-aliases protoinfo-number - servinfo? servinfo-name servinfo-aliases servinfo-port servinfo-protocol -;;; One-stop shops -- does what you want - host-information protocol-information service-information -;;; Utilities - string->ip ip->string) - - (import chicken.fixnum chicken.string chicken.blob srfi-2 scheme - typed-records srfi-9 chicken.foreign srfi-4 chicken.base - foreigners system-information - chicken.format) - - (define (vector-map p v0) ; to avoid linking in vector-lib - (let* ((len (vector-length v0)) - (v (make-vector len))) - (do ((i 0 (+ i 1))) - ((>= i len) v) - (vector-set! v i - (p i (vector-ref v0 i)))))) - - (cond-expand [unsafe - (eval-when (compile) - (define-inline (##sys#check-string . r) - (##core#undefined))) ] - [else]) - -;;; C data structure conversions - - (define (c-pointer->blob ptr len) - (let ((bv (make-blob len)) - (memcpy (foreign-lambda bool "C_memcpy" blob c-pointer integer))) - (memcpy bv ptr len) - bv)) - -;; Convert from null-terminated array of c-strings to vector of strings. -;; These functions use C_alloc and so are not suitable for large datasets. -;; Note: get_argv_2 of runtime.c shows how to build a list instead of a vector (in reverse). - (define array0->string-vector - (foreign-primitive scheme-object (((c-pointer "char *") list)) " - char **p; int len = 0; - C_word *a, vec, *elt; - - for (p = list; *p; ++p, ++len); - - a = C_alloc(C_SIZEOF_VECTOR(len)); - vec = (C_word)a; - *a++ = C_make_header(C_VECTOR_TYPE, len); - - for (p = list; *p; ++p) { - len = strlen(*p); - elt = C_alloc(C_SIZEOF_STRING(len)); - /* Both C_mutate and *a++ = seem to work fine here. */ - C_mutate(a++, C_string(&elt, len, *p)); - } - return(vec);" - )) - - ;; Convert from null-terminated array of IP addresses to vector of strings. - (define array0->bytevector-vector - (foreign-primitive scheme-object (((c-pointer "char *") list) (integer addrlen)) " - char **p; int len = 0; - C_word *a, vec, *elt; - - for (p = list; *p; ++p, ++len); - - a = C_alloc(C_SIZEOF_VECTOR(len)); - vec = (C_word)a; - *a++ = C_make_header(C_VECTOR_TYPE, len); - - for (p = list; *p; ++p) { - elt = C_alloc(C_SIZEOF_STRING(addrlen)); - C_mutate(a++, C_bytevector(&elt, addrlen, *p)); - } - return(vec);" - )) - - ;; Not currently used. Could make the array0-> stuff somewhat cleaner. - ;; (define array0-length - ;; (foreign-lambda* integer (((pointer "void *") list)) #<ip conversion - - ;; inet_pton does not like "127.1", nor "0", nor any other non-standard - ;; representation of IP addresses. This is specified by RFC2553. - ;; inet_aton resolves these addresses. We use inet_pton here. - - (define-foreign-variable inet4-addrstrlen integer "INET_ADDRSTRLEN") - (define-foreign-variable inet6-addrstrlen integer "INET6_ADDRSTRLEN") - (define-foreign-variable af-inet integer "AF_INET") - (define-foreign-variable af-inet6 integer "AF_INET6") - - (define inet-ntop (foreign-lambda c-string "inet_ntop" integer u8vector c-string integer)) - (define inet-pton (foreign-lambda* bool ((integer type) (c-string src) (blob dest)) - "return(inet_pton(type, src, dest) == 1);")) - - (define (string->ip4 str) - (##sys#check-string str 'string->ip4) - (let ((bv (make-blob 4))) - (and (inet-pton af-inet str bv) - (blob->u8vector bv)))) - - (define (string->ip6 str) - (##sys#check-string str 'string->ip6) - (let ((bv (make-blob 16))) - (and (inet-pton af-inet6 str bv) - (blob->u8vector bv)))) - - (define (string->ip str) - (or (string->ip4 str) - (string->ip6 str))) - -;;; ip->string conversion - - (define (ip4->string addr) - (let ((len inet4-addrstrlen)) - (inet-ntop af-inet addr (make-string len) len))) - - (define (ip6->string addr) - (let ((len inet6-addrstrlen)) - (inet-ntop af-inet6 addr (make-string len) len))) - - ;; Take an IPv4 or IPv6 u8vector and convert it into the - ;; appropriate string representation, via inet_ntop. - (define (ip->string addr) - (let ((len (u8vector-length addr))) - (cond ((fx= len 4) (ip4->string addr)) - ((fx= len 16) (ip6->string addr)) - (else - (error "Invalid IP address length" addr))))) - -;;; hostent raw structure - - (define-foreign-record-type (hostent "struct hostent") - (c-string h_name hostent-name) - (c-pointer h_aliases hostent-h_aliases) - (integer h_addrtype hostent-addrtype) - (integer h_length hostent-length) - (c-pointer h_addr_list hostent-addr-list)) - - ;; Some convenient accessors for the raw hostent structure--with raw c pointers - ;; converted to the appropriate scheme objects. We only use these once or twice - ;; below, so their main advantage is clarity. - (define (hostent-aliases h) - (array0->string-vector (hostent-h_aliases h))) - (define (hostent-address h) - (let* ((get-addr (foreign-lambda* c-pointer ((hostent h)) "return(h->h_addr_list[0]);")) - (addr (get-addr h))) - (blob->u8vector - (c-pointer->blob addr (hostent-length h))))) - (define (hostent-addresses h) - (vector-map (lambda (i x) (blob->u8vector x)) - (array0->bytevector-vector (hostent-addr-list h) - (hostent-length h)))) - ;; The IPv6 equivalents of these are getipnodebyname and - ;; getipnodebyaddr. - (define gethostent/name (foreign-lambda hostent "gethostbyname" c-string)) - - (define (gethostent/addr addr) - (if (fx= (u8vector-length addr) 4) - (gethostent/addr/bv (u8vector->blob addr)) - (error "invalid IP address length; only IPv4 supported" addr))) - - ;; Warning: handle IPv6!! - (define gethostent/addr/bv (foreign-lambda* hostent ((blob addr)) - "return(gethostbyaddr((const char *)addr, 4, AF_INET));")) - - ;; This was originally made a macro so we could easily return multiple - ;; values -- but we're now returning a hostinfo structure. Eh. - (define (hostent->hostinfo h) - (make-hostinfo (hostent-name h) - (hostent-addresses h) - (hostent-aliases h))) - -;;; hostinfo and host information - - ;; The standard host name for the current processor. - ;; Gets & Sets, error otherwise. - - (define set-host-name! - (foreign-lambda* int ((c-string name)) - "return(sethostname(name, strlen(name)));")) - - (define (current-hostname . args) - (if (null? args) - (get-host-name) - (and (zero? (set-host-name! (->string (car args)))) - (error 'current-hostname "cannot set hostname")))) - - ;; Structure accessors created by define-foreign-record do not intercept - ;; NULL pointer input, including #f. - (define (hostname->ip host) - (and-let* ((h (gethostent/name host))) - (hostent-address h))) - - (define (hostname->hostinfo host) - (and-let* ((h (gethostent/name host))) - (hostent->hostinfo h))) - - (define (ip->hostname addr) - (and-let* ((h (gethostent/addr addr))) - (hostent-name h))) - - (define (ip->hostinfo addr) - (and-let* ((h (gethostent/addr addr))) - (hostent->hostinfo h))) - - ;; A simple hostinfo structure. - (define-record-type hostinfo - (make-hostinfo name addresses aliases) - hostinfo? - (name hostinfo-name) - (addresses hostinfo-addresses) - (aliases hostinfo-aliases)) - - ;; "Accessors" for phantom fields. - ;; We don't need to store length or type, as these are artifacts - ;; of the C implementation, and can be derived from the address itself. - (define (hostinfo-address h) (vector-ref (hostinfo-addresses h) 0)) - (define (hostinfo-length h) (u8vector-length (hostinfo-address h))) - (define (hostinfo-type h) - (let ((len (u8vector-length (hostinfo-address h)))) - (cond ((fx= len 4) 'AF_INET) ;; Kind of a dummy implementation-- - ((fx= len 16) 'AF_INET6) ;; not sure what value would be appropriate - (else - (error "Invalid IP address length" (hostinfo-address h)))))) - - ;; Format the structure for easy interactive viewing--should be possible to - ;; add a ctor for this representation, though it's not clear why you'd want to. - (define-record-printer (hostinfo h port) - (fprintf port "#,(hostinfo name: ~S addresses: ~S aliases: ~S)" - (hostinfo-name h) (hostinfo-addresses h) (hostinfo-aliases h))) - - ;; Warning: lookup of an IP address which is invalid yet numeric will - ;; return a false positive. Bug in gethostbyname? - ;; E.g. (hostname->hostinfo "1") => #,(hostinfo name: "1" addresses: (#u8(0 0 0 1))) - ;; ** If we used inet_aton for string->ip, then these cases would - ;; be transformed into u8vector IPs, and the lookup would correctly fail. - - ;; Return a hostinfo record. HOST is a u8vector IP address, a string - ;; hostname, or a string numeric IP address. - (define (host-information host) - (if (u8vector? host) - (ip->hostinfo host) - (begin - (##sys#check-string host 'host-information) - (cond ((string->ip host) => ip->hostinfo) - (else (hostname->hostinfo host)))))) - -;;; protocols - - (define-foreign-record-type (protoent "struct protoent") - (c-string p_name protoent-name) - (c-pointer p_aliases protoent-p_aliases) - (integer p_proto protoent-proto)) - - (define getprotoent/name (foreign-lambda protoent "getprotobyname" c-string)) - (define getprotoent/number (foreign-lambda protoent "getprotobynumber" integer)) - - ;; Raw structure -> scheme-object accessors - (define (protoent-aliases p) - (array0->string-vector (protoent-p_aliases p))) - - (define-record-type protoinfo - (make-protoinfo name number aliases) - protoinfo? - (name protoinfo-name) - (number protoinfo-number) - (aliases protoinfo-aliases)) - - (define-record-printer (protoinfo p port) - (fprintf port "#,(protoinfo name: ~S number: ~S aliases: ~S)" - (protoinfo-name p) (protoinfo-number p) (protoinfo-aliases p))) - - (define (protocol-name->number name) - (and-let* ((p (getprotoent/name name))) - (protoent-proto p))) - (define (protocol-number->name nr) - (and-let* ((p (getprotoent/number nr))) - (protoent-name p))) - - (define (protoent->protoinfo p) - (make-protoinfo (protoent-name p) - (protoent-proto p) - (protoent-aliases p))) - - (define (protocol-name->protoinfo name) - (and-let* ((p (getprotoent/name name))) - (protoent->protoinfo p))) - (define (protocol-number->protoinfo nr) - (and-let* ((p (getprotoent/number nr))) - (protoent->protoinfo p))) - - (define (protocol-information proto) - (if (fixnum? proto) - (protocol-number->protoinfo proto) - (begin - (##sys#check-string proto 'protocol-information) - (protocol-name->protoinfo proto)))) - -;;; services - - (define-foreign-type port-number int - (foreign-lambda int "htons" int) - (foreign-lambda int "ntohs" int) ) - - (define-foreign-record-type (servent "struct servent") - (c-string s_name servent-name) - (c-pointer s_aliases servent-s_aliases) - (port-number s_port servent-port) - (c-string s_proto servent-proto)) - - (define (servent->servinfo s) - (make-servinfo (servent-name s) - (servent-port s) - (array0->string-vector - (servent-s_aliases s)) - (servent-proto s))) - - (define getservent/name (foreign-lambda servent "getservbyname" c-string c-string)) - (define getservent/port (foreign-lambda servent "getservbyport" port-number c-string)) - - (define-record-type servinfo - (make-servinfo name port aliases protocol) - servinfo? - (name servinfo-name) - (port servinfo-port) - (aliases servinfo-aliases) - (protocol servinfo-protocol)) - - (define-record-printer (servinfo s port) - (fprintf port "#,(servinfo name: ~S port: ~S aliases: ~S protocol: ~S)" - (servinfo-name s) (servinfo-port s) (servinfo-aliases s) (servinfo-protocol s))) - - ;; If provided with the optional protocol argument (a string), these will - ;; restrict their search to that protocol. - (define (service-name->port name . pr) - (let-optionals pr ((proto #f)) - (and-let* ((s (getservent/name name proto))) - (servent-port s)))) - (define (service-port->name port . pr) - (let-optionals pr ((proto #f)) - (and-let* ((s (getservent/port port proto))) - (servent-name s)))) - (define (service-name->servinfo name . pr) - (let-optionals pr ((proto #f)) - (and-let* ((s (getservent/name name proto))) - (servent->servinfo s)))) - (define (service-port->servinfo port . pr) - (let-optionals pr ((proto #f)) - (and-let* ((s (getservent/port port proto))) - (servent->servinfo s)))) - - ;; Return service information given a service name or port, and an - ;; optional protocol name or number to restrict the search to. - ;; Note: if the protocol-number->name lookup fails, - ;; an error is thrown, as this was probably not intended. - (define (service-information service . pr) - (let-optionals pr ((proto #f)) - (let ((proto (if (fixnum? proto) - (or (protocol-number->name proto) - (error 'service-information "illegal protocol number" proto)) - proto))) - (if (fixnum? service) - (service-port->servinfo service proto) - (begin - (##sys#check-string service 'service-information) - (service-name->servinfo service proto)))))) -) ; end module - -;;; Tests -(cond-expand - [testing - (import hostinfo) - (current-hostname) - (host-information "www.call-with-current-continuation.org") - (host-information '#u8(194 97 107 133)) - (host-information "194.97.107.133") - ; => #,(hostinfo name: "www003.lifemedien.de" addresses: #(#u8(194 97 107 133)) - ; aliases: #("www.call-with-current-continuation.org")) - (ip->hostname '#u8(194 97 107 133)) ; "www003.lifemedien.de" - (string->ip "0708::0901") ; #u8(7 8 0 0 0 0 0 0 0 0 0 0 0 0 9 1) - (ip->string '#u8(127 0 0 1)) ; "127.0.0.1" - (hostinfo-aliases - (hostname->hostinfo - (ip->hostname (hostname->ip - (hostinfo-name - (host-information "www.call-with-current-continuation.org")))))) - ; => #("www.call-with-current-continuation.org") - - (protocol-information 17) ; => #,(protoinfo name: "udp" number: 17 aliases: #("UDP")) - (protoinfo-name (protocol-information 2)) ; => "igmp" - (protoinfo-aliases (protocol-name->protoinfo - (protocol-number->name - (protoinfo-number - (protocol-information "ospf"))))) ; => #("OSPFIGP") - (protocol-name->number "OSPFIGP") ; 89 (you can look up aliases, too) - - (servinfo-protocol (service-name->servinfo - (service-port->name - (servinfo-port (service-information "ssh"))))) ; => "udp" (yes, really) - (service-information "ssh" "tcp") ; => #,(servinfo name: "ssh" port: 22 aliases: #() protocol: "tcp") - (service-information "ssh" "tco") ; => #f - (service-information 512 "tcp") ; #,(servinfo name: "exec" port: 512 aliases: #() protocol: "tcp") - (service-information 512 "udp") ; #,(servinfo name: "comsat" port: 512 aliases: #("biff") protocol: "udp") - (service-information 512 17) ; same as previous - (service-information 512 170000) ; Error: (service-information) illegal protocol number: 170000 - ] [else]) DELETED hostinfo/hostinfo.setup Index: hostinfo/hostinfo.setup ================================================================== --- hostinfo/hostinfo.setup +++ /dev/null @@ -1,11 +0,0 @@ -(define libs - (if (eq? (build-platform) 'msvc) - "-lws2_32" - "") ) - -(compile -s -O2 -d2 hostinfo.scm ,libs -j hostinfo) -(compile -s -O2 -d0 hostinfo.import.scm) -(install-extension - 'hostinfo - '("hostinfo.so" "hostinfo.import.so") - '((version "1.4.1"))) Index: launchmod.scm ================================================================== --- launchmod.scm +++ launchmod.scm @@ -663,11 +663,12 @@ (let* ((tconfig-fname (conc work-area "/.testconfig")) (tconfig-tmpfile (conc tconfig-fname ".tmp")) (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) (scripts (configf:get-section tconfig "scripts"))) ;; create .testconfig file - (configf:write-alist tconfig tconfig-tmpfile) + (configf:write-alist tconfig tconfig-tmpfile #t) ;; the #t forces a check of the written data + (assert (file-exists? tconfig-tmpfile) "FATAL: We just wrote the dang file, how can it not exist?") (move-file tconfig-tmpfile tconfig-fname #t) (delete-file* ".final-status") ;; extract scripts from testconfig and write them to files in test run dir (for-each @@ -1470,10 +1471,12 @@ (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) + + (setenv "MT_CMDINFO" cmdparms) ;; setting this for use in nblauncher ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway (if (common:file-exists? work-area) @@ -1888,11 +1891,11 @@ ;; 0 RUNNING ==> this is actually the first condition, should not get here (define (runs:end-of-run-check run-id ) (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) - (all-test-launched (rmt:get-var run-id (conc "lunch-complete-" run-id))) + (all-test-launched (rmt:get-var run-id (conc "launch-complete-" run-id))) (current-state (rmt:get-run-state run-id)) (current-status (rmt:get-run-status run-id))) ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing (debug:print 0 *default-log-port* "Running test cnt :" running-cnt) (rmt:set-state-status-and-roll-up-run run-id current-state current-status) @@ -1906,11 +1909,11 @@ (rmt:set-var run-id (conc "end-of-run-" run-id) "yes") ;(thread-sleep! 10) (runs:run-post-hook run-id) (debug:print 4 *default-log-port* "currseconds: " (current-seconds)" eor: " (rmt:get-var run-id (conc "end-of-run-" run-id))) (common:simple-unlock (conc "endOfRun" run-id))) - (debug:print 0 *default-log-port* "End of Run Detected but not running post hook. This should happen when eor is set to yes. This will happen only when 2 tests exit at smae time. eor= " (rmt:get-var run-id (conc "end-of-run-" run-id))))) + (debug:print 0 *default-log-port* "End of Run Detected but not running post hook. This should happen when eor is set to yes. This will happen only when 2 tests exit at same time. eor= " (rmt:get-var run-id (conc "end-of-run-" run-id))))) ((> running-cnt 3) (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) ((> running-cnt 0) (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) (let ((kill-cnt (launch:kill-tests-if-dead run-id))) @@ -1929,34 +1932,34 @@ (define (runs:find-and-mark-incomplete-and-check-end-of-run run-id ovr-deadtime) (rmt:find-and-mark-incomplete run-id ovr-deadtime) (runs:end-of-run-check run-id)) - - +;; only called if there are more than zero running tests (define (launch:kill-tests-if-dead run-id) (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (let loop ((running-test (car running-tests)) - (tal (cdr running-tests)) - (kill-cnt 0)) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11)) - (test-id (vector-ref running-test 0)) - (host (vector-ref running-test 6)) - (pid (rmt:test-get-top-process-pid run-id test-id)) - (event-time (vector-ref running-test 5)) - (duration (vector-ref running-test 12)) - (flag 0) - (curr-time (current-seconds))) - (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed - (begin - (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") + (let loop ((running-test (car running-tests)) + (tal (cdr running-tests)) + (kill-cnt 0)) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11)) + (test-id (vector-ref running-test 0)) + (host (vector-ref running-test 6)) + (pid (rmt:test-get-top-process-pid run-id test-id)) + (event-time (vector-ref running-test 5)) + (duration (vector-ref running-test 12)) + (flag 0) + (curr-time (current-seconds))) + (if (and (< (+ event-time duration 600) curr-time) + (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed + (begin + (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") (set! flag 1) (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) - (if (not (null? tal)) - (loop (car tal) (cdr tal) (+ kill-cnt flag)) - (+ kill-cnt flag)))))) + (if (not (null? tal)) + (loop (car tal) (cdr tal) (+ kill-cnt flag)) + (+ kill-cnt flag)))))) (define (runs:run-post-hook run-id) (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) (existing-tests (if run-post-hook Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -19,11 +19,11 @@ (declare (uses dbi)) (declare (uses pkts)) (declare (uses stml2)) (declare (uses cookie)) (declare (uses csv-xml)) -(declare (uses hostinfo)) +;; (declare (uses hostinfo)) (declare (uses adjutant)) (declare (uses archivemod)) (declare (uses apimod)) (declare (uses autoload)) @@ -42,10 +42,11 @@ (declare (uses processmod)) (declare (uses rmtmod)) (declare (uses runsmod)) (declare (uses servermod)) (declare (uses testsmod)) +(declare (uses dbmgrmod)) ;; needed for configf scripts, scheme etc. ;; (declare (uses apimod.import)) ;; (declare (uses debugprint.import)) ;; (declare (uses mtargs.import)) @@ -57,11 +58,11 @@ ;; (declare (uses servermod.import)) ;; (declare (uses launchmod.import)) ;; (include "call-with-environment-variables/call-with-environment-variables.scm") -(module megatest-main +(module megatest * (import scheme chicken.base chicken.bitwise @@ -130,11 +131,11 @@ ;; local modules autoload adjutant csv-xml - hostinfo + ;; hostinfo mtver mutils cookie csv-xml ducttape-lib @@ -156,16 +157,28 @@ rmtmod runsmod servermod tasksmod testsmod + dbmgrmod + ulex ) - + +;; ;; ulex parameters +;; (work-method 'direct) +;; (return-method 'direct) + + ;; ulex parameters + ;; (work-method 'mailbox) + ;; (return-method 'mailbox) + +(my-with-lock common:with-simple-file-lock) + ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) - +(define *didsomething* #f) (define *db* #f) ;; this is only for the repl, do not use in general!!!! ;; (include "common_records.scm") ;; (include "key_records.scm") ;; (include "db_records.scm") @@ -182,10 +195,12 @@ (include "diff-report.scm") (include "ods.scm") (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file + +(set! *toppath* (get-environment-variable "PWD")) ;;====================================================================== ;; Test commands (i.e. for use inside tests) ;;====================================================================== @@ -269,11 +284,11 @@ (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (rerun-cnt (if config-reruns config-reruns 1))) - + (debug:print 0 *default-log-port* "handle-run-requests *toppath* = " *toppath*) (runs:run-tests target runname #f ;; (common:args-get-testpatt #f) ;; (or (args:get-arg "-testpatt") ;; "%") @@ -319,11 +334,11 @@ (if (not (directory-exists? log-dir)) (system (conc "mkdir -p " log-dir))) (open-output-file logpath)) (exn () (debug:print-error 0 *default-log-port* "Could not open log file for write: "logpath-in) - (define *didsomething* #t) + (set! *didsomething* #t) (exit 1)))) ;; Disabled help items ;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s) ;; from prior runs with same keys @@ -503,18 +518,10 @@ Version " megatest-version ", built from " megatest-fossil-hash )) (define (main) (make-and-init-bigdata) - - ;; set up the functions in http transport - (hash-table-set! *http-functions* 'api:process-request api:process-request) - (hash-table-set! *http-functions* 'http-transport:main-page http-transport:main-page) - (hash-table-set! *http-functions* 'http-transport:show-jquery http-transport:show-jquery) - (hash-table-set! *http-functions* 'http-transport:html-test-log http-transport:html-test-log) - (hash-table-set! *http-functions* 'http-transport:html-dboard http-transport:html-dboard) - ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) (load debugcontrolf))) @@ -753,19 +760,30 @@ ;; (if (args:get-arg "-start-dir") (if (common:file-exists? (args:get-arg "-start-dir")) (let ((fullpath (common:real-path (args:get-arg "-start-dir")))) (set-environment-variable! "PWD" fullpath) - (change-directory fullpath)) + (change-directory fullpath) + (set! *toppath* fullpath)) (begin (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.") (exit 1)))) + + (set! *toppath* (get-environment-variable "PWD")) + + ;; immediately set MT_TARGET if -reqtarg or -target are available ;; (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) - (if targ (set-environment-variable! "MT_TARGET" targ))) + (if targ + (begin + (set-environment-variable! "MT_TARGET" targ) + (mytarget targ) + ) + ) + ) ;; The watchdog is to keep an eye on things like db sync etc. ;; ;; (init-watchdog) @@ -828,12 +846,10 @@ (if (args:get-arg "-version") (begin (print (common:version-signature)) ;; (print megatest-version) (exit))) - (define *didsomething* #f) - ;; Overall exit handling setup immediately ;; (if (or (args:get-arg "-process-reap")) ;; (args:get-arg "-runtests") ;; (args:get-arg "-execute") @@ -1067,15 +1083,17 @@ (else (pp data)))))) (if out-file (close-output-port out-port)) (exit) ;; yes, bending the rules here - need to exit since this is a utility )) - - (if (args:get-arg "-ping") + + ;; disabled for now + + #;(if (args:get-arg "-ping") (let* ((server-id (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":" (host:port (args:get-arg "-ping"))) - (server:ping (or server-id host:port) #f do-exit: #t))) + (server-ready? (or server-id host:port) #f do-exit: #t))) ;;====================================================================== ;; Capture, save and manipulate environments ;;====================================================================== @@ -2638,11 +2656,11 @@ ;;(debug:print-info 13 *default-log-port* "thread-join! watchdog") ;; join the watchdog thread if it has been thread-start!ed (it may not have been started in the case of a server that never enters running state) ;; (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead) ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage - (let* ((watchdog (bdat-watchdog *bdat*))) + #;(let* ((watchdog (bdat-watchdog *bdat*))) (if (thread? watchdog) (case (thread-state watchdog) ((ready running blocked sleeping terminated dead) (thread-join! watchdog))))) @@ -2658,11 +2676,14 @@ ((1)(exit 1)) ((2)(exit 2)) (else (exit 3))))) ) -) +;; (import megatest-main commonmod) +;; (import srfi-18) + +(thread-join! + (thread-start! + (make-thread main))) -(import megatest-main) -(import commonmod) -(main) +) Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -55,10 +55,20 @@ (if (string? help) (print help) (print "Usage: " (car (argv)) " ... ")) (exit 0)) + ;; one-of args defined +(define (any-defined? . param) + (let ((res #f)) + (for-each + (lambda (arg) + (if (get-arg arg)(set! res #t))) + param) + res)) + +;; args: (define (get-args args params switches arg-hash num-needed) (let* ((numtargs (length args)) (adj-num-needed (if num-needed (+ num-needed 2) #f))) (if (< numtargs (if adj-num-needed adj-num-needed 2)) (if (>= num-needed 1) ADDED mtserve.scm Index: mtserve.scm ================================================================== --- /dev/null +++ mtserve.scm @@ -0,0 +1,2909 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +;; (declare (uses dbi)) +(declare (uses pkts)) +;; (declare (uses stml2)) +;; (declare (uses cookie)) +;; (declare (uses csv-xml)) +;; (declare (uses hostinfo)) + +(declare (uses adjutant)) +;; (declare (uses archivemod)) +(declare (uses apimod)) +;; (declare (uses autoload)) +;; (declare (uses bigmod)) +(declare (uses commonmod)) +(declare (uses configfmod)) +(declare (uses dbmod)) +(declare (uses debugprint)) +;; (declare (uses ducttape-lib)) +;; (declare (uses ezstepsmod)) +(declare (uses launchmod)) +(declare (uses mtargs)) +(declare (uses mtver)) +;; (declare (uses mutils)) +(declare (uses processmod)) +(declare (uses rmtmod)) +;; (declare (uses runsmod)) +;; (declare (uses servermod)) +;; (declare (uses testsmod)) +(declare (uses dbmgrmod)) + +;; needed for configf scripts, scheme etc. +;; (declare (uses apimod.import)) +;; (declare (uses debugprint.import)) +;; (declare (uses mtargs.import)) +;; (declare (uses commonmod.import)) +;; (declare (uses configfmod.import)) +;; (declare (uses bigmod.import)) +;; (declare (uses dbmod.import)) +;; (declare (uses rmtmod.import)) +;; (declare (uses servermod.import)) +;; (declare (uses launchmod.import)) + +;; (include "call-with-environment-variables/call-with-environment-variables.scm") + +(module mtserve + * + + (import scheme + + chicken.base +;; chicken.bitwise +;; chicken.condition +;; ;; chicken.csi +;; chicken.eval +;; chicken.file +;; chicken.file.posix +;; chicken.format +;; chicken.io +;; chicken.irregex +;; chicken.pathname +;; chicken.port +;; chicken.pretty-print +;; chicken.process + chicken.process-context +;; chicken.process-context.posix +;; chicken.process.signal +;; chicken.random +;; chicken.repl +;; chicken.sort + chicken.string +;; chicken.tcp +;; chicken.time +;; chicken.time.posix +;; +;; (prefix base64 base64:) +;; (prefix sqlite3 sqlite3:) +;; (prefix sxml-modifications sxml-) +;; address-info +;; csv-abnf +;; directory-utils +;; fmt +;; format +;; http-client +;; intarweb +;; json +;; linenoise +;; matchable +;; md5 +;; message-digest +;; queues +;; regex +;; regex-case +;; s11n +;; sparse-vectors +;; spiffy +;; spiffy-directory-listing +;; spiffy-request-vars +;; sql-de-lite +;; stack +;; sxml-modifications +;; sxml-serializer +;; sxml-transforms +;; system-information +;; typed-records +;; uri-common +;; z3 +;; +;; srfi-1 +;; srfi-4 + srfi-18 +;; srfi-13 +;; srfi-98 +;; srfi-69 +;; +;; ;; local modules +;; autoload +;; adjutant +;; csv-xml +;; ;; hostinfo +;; mtver +;; mutils +;; cookie +;; csv-xml +;; ducttape-lib + (prefix mtargs args:) +;; pkts +;; stml2 +;; (prefix dbi dbi:) +;; +;; apimod +;; archivemod +;; bigmod + commonmod +;; configfmod +;; dbmod + debugprint +;; ezstepsmod + launchmod +;; processmod +;; rmtmod +;; runsmod +;; servermod +;; tasksmod +;; testsmod + dbmgrmod +;; +;; ulex + ) + +;; ;; ulex parameters +;; (work-method 'direct) +;; (return-method 'direct) + + ;; ulex parameters + ;; (work-method 'mailbox) + ;; (return-method 'mailbox) + +;; (my-with-lock common:with-simple-file-lock) +;; +;; ;; fake out readline usage of toplevel-command +;; (define (toplevel-command . a) #f) +;; (define *didsomething* #f) +;; (define *db* #f) ;; this is only for the repl, do not use in general!!!! +;; +;; ;; (include "common_records.scm") +;; ;; (include "key_records.scm") +;; ;; (include "db_records.scm") +;; (include "run_records.scm") +;; ;; (include "test_records.scm") +;; +;; ;; (include "common.scm") +;; (include "db.scm") +;; ;; (include "server.scm") +;; (include "tests.scm") +;; (include "genexample.scm") +;; (include "tdb.scm") +;; (include "env.scm") +;; (include "diff-report.scm") +;; (include "ods.scm") +;; + + ;; process args + (define remargs (args:get-args + (argv) + (list ;; "-runtests" ;; run a specific test + ;; "-config" ;; override the config file name + ;; "-append-config" + ;; "-execute" ;; run the command encoded in the base64 parameter + ;; "-step" + ;; "-target" + ;; "-reqtarg" + ;; ":runname" + ;; "-runname" + ;; ":state" + ;; "-state" + ;; ":status" + ;; "-status" + ;; "-list-runs" + ;; "-testdata-csv" + ;; "-testpatt" + ;; "--modepatt" + ;; "-modepatt" + ;; "-tagexpr" + ;; "-itempatt" + ;; "-setlog" + ;; "-set-toplog" + ;; "-runstep" + ;; "-logpro" + ;; "-m" + ;; "-rerun" + ;; + ;; "-days" + ;; "-rename-run" + ;; "-to" + ;; "-dest" + ;; "-source" + ;; "-time-stamp" + ;; ;; values and messages + ;; ":category" + ;; ":variable" + ;; ":value" + ;; ":expected" + ;; ":tol" + ;; ":units" + ;; + ;; ;; misc + ;; "-start-dir" + ;; "-run-patt" + ;; "-target-patt" + ;; "-contour" + ;; "-area-tag" + ;; "-area" + ;; "-run-tag" + "-server" + "-db" ;; file name for setting up a server + ;; "-adjutant" + ;; "-transport" + ;; "-port" + ;; "-extract-ods" + ;; "-pathmod" + ;; "-env2file" + ;; "-envcap" + ;; "-envdelta" + ;; "-setvars" + ;; "-set-state-status" + ;; + ;; ;; move runs stuff here + ;; "-remove-keep" + ;; "-set-run-status" + ;; "-age" + ;; + ;; ;; archive + ;; "-archive" + ;; "-actions" + ;; "-precmd" + ;; "-include" + ;; "-exclude-rx" + ;; "-exclude-rx-from" + ;; + "-debug" ;; for *verbosity* > 2 + ;; "-debug-noprop" + ;; "-create-test" + ;; "-override-timeout" + ;; "-test-files" ;; -test-paths is for listing all + ;; "-load" ;; load and exectute a scheme file + ;; "-section" + ;; "-var" + ;; "-dumpmode" + ;; "-run-id" + ;; "-ping" + ;; "-refdb2dat" + ;; "-o" + ;; "-log" + ;; "-autolog" + ;; "-sync-log" + ;; "-since" + ;; "-fields" + ;; "-recover-test" ;; run-id,test-id - used internally to recover a test stuck in RUNNING state + ;; "-sort" + ;; "-target-db" + ;; "-source-db" + ;; "-prefix-target" + ;; + ;; "-src-target" + ;; "-src-runname" + ;; "-diff-email" + ;; "-sync-to" + ;; "-pgsync" + ;; "-kill-wait" ;; wait this long before removing test (default is 10 sec) + ;; "-diff-html" + ;; + ;; ;; wizards, area capture, setup new ... + ;; "-extract-skeleton" + ) + (list "-h" "-help" "--help" + ;; "-manual" + "-version" + ;; "-force" + ;; "-xterm" + ;; "-showkeys" + ;; "-show-keys" + ;; "-test-status" + ;; "-set-values" + ;; "-load-test-data" + ;; "-summarize-items" + ;; "-gui" + ;; "-daemonize" + ;; "-preclean" + ;; "-rerun-clean" + ;; "-rerun-all" + ;; "-clean-cache" + ;; "-no-cache" + ;; "-cache-db" + ;; "-cp-eventtime-to-publishtime" + ;; "-use-db-cache" + ;; "-prepend-contour" + ;; + ;; + ;; ;; misc + ;; "-repl" + ;; "-lock" + ;; "-unlock" + ;; "-list-servers" + ;; "-kill-servers" + ;; "-run-wait" ;; wait on a run to complete (i.e. no RUNNING) + ;; "-one-pass" ;; + ;; "-local" ;; run some commands using local db access + ;; "-generate-html" + ;; "-generate-html-structure" + ;; "-list-run-time" + ;; "-list-test-time" + ;; + ;; ;; misc queries + ;; "-list-disks" + ;; "-list-targets" + ;; "-list-db-targets" + ;; "-show-runconfig" + ;; "-show-config" + ;; "-show-cmdinfo" + ;; "-get-run-status" + ;; "-list-waivers" + ;; + ;; ;; queries + ;; "-test-paths" ;; get path(s) to a test, ordered by youngest first + ;; + ;; "-runall" ;; run all tests, respects -testpatt, defaults to % + ;; "-run" ;; alias for -runall + ;; "-remove-runs" + ;; "-kill-runs" + ;; "-kill-rerun" + ;; "-keep-records" ;; use with -remove-runs to remove only the run data + ;; "-rebuild-db" + ;; "-cleanup-db" + ;; "-rollup" + ;; "-update-meta" + ;; "-create-megatest-area" + ;; "-mark-incompletes" + ;; + ;; "-convert-to-norm" + ;; "-convert-to-old" + ;; "-import-megatest.db" + ;; "-sync-to-megatest.db" + ;; "-sync-brute-force" + ;; "-logging" + ;; "-v" ;; verbose 2, more than normal (normal is 1) + ;; "-q" ;; quiet 0, errors/warnings only + ;; + ;; "-diff-rep" + ;; + ;; "-syscheck" + ;; "-obfuscate" + ;; junk placeholder + ;; "-:p" + + ) + args:arg-hash + 0)) + + ;; Add args that use remargs here + ;; + (if (not (null? remargs)) + (debug:print-error 0 *default-log-port* "Unrecognised arguments: " (string-intersperse (if (list? remargs) remargs (argv)) " "))) + + ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file + ;; + (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.mtserverc"))) + (if (common:file-exists? debugcontrolf) + (load debugcontrolf))) + + ;; before doing anything else change to the start-dir if provided + ;; + (if (args:get-arg "-start-dir") + (if (common:file-exists? (args:get-arg "-start-dir")) + (let ((fullpath (common:real-path (args:get-arg "-start-dir")))) + (set-environment-variable! "PWD" fullpath) + (change-directory fullpath)) + (begin + (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.") + (exit 1)))) + + + (define (main) + (debug:setup) + (make-and-init-bigdata) + (let ((tl (launch:setup)) + (dbname (args:get-arg "-db"))) + (if dbname + (rmt:server-launch dbname) + (debug:print 0 *default-log-port* "Usage: mtserve -db .db")))) + #;(set! *didsomething* #t) + + + (thread-join! + (thread-start! + (make-thread main))) + +) + +;; (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file +;; (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file +;; +;; ;;====================================================================== +;; ;; Test commands (i.e. for use inside tests) +;; ;;====================================================================== +;; +;; (define (megatest:step step state status logfile msg) +;; (if (not (get-environment-variable "MT_CMDINFO")) +;; (begin +;; (debug:print-error 0 *default-log-port* "MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!") +;; (exit 5)) +;; (let* ((cmdinfo (common:read-encoded-string (get-environment-variable "MT_CMDINFO"))) +;; (transport (assoc/default 'transport cmdinfo)) +;; (testpath (assoc/default 'testpath cmdinfo)) +;; (test-name (assoc/default 'test-name cmdinfo)) +;; (runscript (assoc/default 'runscript cmdinfo)) +;; (db-host (assoc/default 'db-host cmdinfo)) +;; (run-id (assoc/default 'run-id cmdinfo)) +;; (test-id (assoc/default 'test-id cmdinfo)) +;; (itemdat (assoc/default 'itemdat cmdinfo)) +;; (work-area (assoc/default 'work-area cmdinfo)) +;; (db #f)) +;; (change-directory testpath) +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; (if (and state status) +;; (let ((comment (launch:load-logpro-dat run-id test-id step))) +;; ;; (rmt:test-set-log! run-id test-id (conc stepname ".html")))) +;; (rmt:teststep-set-status! run-id test-id step state status (or comment msg) logfile)) +;; (begin +;; (debug:print-error 0 *default-log-port* "You must specify :state and :status with every call to -step") +;; (exit 6)))))) +;; +;; ;;====================================================================== +;; ;; full run +;; ;;====================================================================== +;; +;; (define (handle-run-requests target runname keys keyvals need-clean) +;; (if (or (args:get-arg "-kill-rerun") (args:get-arg "-rerun-clean")) ;; first set states/statuses correct +;; ;; For rerun-clean do we or do we not support the testpatt? +;; (let ((states (or (configf:lookup *configdat* "validvalues" "cleanrerun-states") +;; "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED")) +;; (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses") +;; "FAIL,INCOMPLETE,ABORT,CHECK,DEAD,PREQ_FAIL,PREQ_DISCARDED"))) +;; (hash-table-set! args:arg-hash "-preclean" #t) +;; (runs:operate-on 'set-state-status +;; target +;; (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) +;; ;; "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") +;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") +;; state: states +;; ;; status: statuses +;; new-state-status: "NOT_STARTED,n/a") +;; (runs:clean-cache target runname *toppath*) +;; (runs:operate-on 'set-state-status +;; target +;; (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) +;; ;; "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") +;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") +;; ;; state: states +;; status: statuses +;; new-state-status: "NOT_STARTED,n/a"))) +;; ;; RERUN ALL +;; (if (args:get-arg "-rerun-all") ;; first set states/statuses correct +;; (let* ((rconfig (full-runconfigs-read))) +;; (hash-table-set! args:arg-hash "-preclean" #t) +;; (runs:operate-on 'set-state-status +;; target +;; (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) +;; (common:args-get-testpatt rconfig) ;; (args:get-arg "-testpatt") +;; state: #f +;; ;; status: statuses +;; new-state-status: "NOT_STARTED,n/a") +;; (runs:clean-cache target runname *toppath*) +;; (runs:operate-on 'set-state-status +;; target +;; (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) +;; (common:args-get-testpatt rconfig) ;; (args:get-arg "-testpatt") +;; ;; state: states +;; status: #f +;; new-state-status: "NOT_STARTED,n/a"))) +;; (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) +;; (if x (string->number x) #f))) +;; (rerun-cnt (if config-reruns +;; config-reruns +;; 1))) +;; +;; (runs:run-tests target +;; runname +;; #f ;; (common:args-get-testpatt #f) +;; ;; (or (args:get-arg "-testpatt") +;; ;; "%") +;; (bdat-user *bdat*) +;; args:arg-hash +;; run-count: rerun-cnt))) +;; +;; ;; csv processing record +;; (define (make-refdb:csv) +;; (vector +;; (make-sparse-array) +;; (make-hash-table) +;; (make-hash-table) +;; 0 +;; 0)) +;; (define-inline (refdb:csv-get-svec vec) (vector-ref vec 0)) +;; (define-inline (refdb:csv-get-rows vec) (vector-ref vec 1)) +;; (define-inline (refdb:csv-get-cols vec) (vector-ref vec 2)) +;; (define-inline (refdb:csv-get-maxrow vec) (vector-ref vec 3)) +;; (define-inline (refdb:csv-get-maxcol vec) (vector-ref vec 4)) +;; (define-inline (refdb:csv-set-svec! vec val)(vector-set! vec 0 val)) +;; (define-inline (refdb:csv-set-rows! vec val)(vector-set! vec 1 val)) +;; (define-inline (refdb:csv-set-cols! vec val)(vector-set! vec 2 val)) +;; (define-inline (refdb:csv-set-maxrow! vec val)(vector-set! vec 3 val)) +;; (define-inline (refdb:csv-set-maxcol! vec val)(vector-set! vec 4 val)) +;; +;; (define (get-dat results sheetname) +;; (or (hash-table-ref/default results sheetname #f) +;; (let ((tmp-vec (make-refdb:csv))) +;; (hash-table-set! results sheetname tmp-vec) +;; tmp-vec))) +;; +;; ;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions +;; (define (open-logfile logpath-in) +;; (condition-case +;; (let* ((log-dir (or (pathname-directory logpath-in) ".")) +;; (fname (pathname-strip-directory logpath-in)) +;; (logpath (if (> (string-length fname) 250) +;; (let ((newlogf (conc log-dir "/" (common:get-signature fname) ".log"))) +;; (debug:print 0 *default-log-port* "WARNING: log file " logpath-in " path too long, converted to " newlogf) +;; newlogf) +;; logpath-in))) +;; (if (not (directory-exists? log-dir)) +;; (system (conc "mkdir -p " log-dir))) +;; (open-output-file logpath)) +;; (exn () +;; (debug:print-error 0 *default-log-port* "Could not open log file for write: "logpath-in) +;; (set! *didsomething* #t) +;; (exit 1)))) +;; +;; ;; Disabled help items +;; ;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s) +;; ;; from prior runs with same keys +;; ;; -daemonize : fork into background and disconnect from stdin/out +;; +;; (define help (conc " +;; Megatest, documentation at http://www.kiatoa.com/fossils/megatest +;; version " megatest-version " +;; license GPL, Copyright Matt Welland 2006-2017 +;; +;; Usage: megatest [options] +;; -h : this help +;; -manual : show the Megatest user manual +;; -version : print megatest version (currently " megatest-version ") +;; +;; Launching and managing runs +;; -run : run all tests or as specified by -testpatt +;; -remove-runs : remove the data for a run, requires -runname and -testpatt +;; Optionally use :state and :status, use -keep-records to remove only +;; the run data. Use -kill-wait to override the 10 second +;; per test wait after kill delay (e.g. -kill-wait 0). +;; -kill-runs : kill existing run(s) (all incomplete tests killed) +;; -kill-rerun : kill an existing run (all incomplete tests killed and run is rerun) +;; -set-state-status X,Y : set state to X and status to Y, requires controls per -remove-runs +;; -rerun FAIL,WARN... : force re-run for tests with specificed status(s) +;; -rerun-clean : set all tests not COMPLETED+PASS,WARN,WAIVED to NOT_STARTED,n/a +;; and then run the specified testpatt with -preclean +;; -rerun-all : set all tests to NOT_STARTED,n/a and run with -preclean +;; -lock : lock run specified by target and runname +;; -unlock : unlock run specified by target and runname +;; -set-run-status status : sets status for run to status, requires -target and -runname +;; -get-run-status : gets status for run specified by target and runname +;; -run-wait : wait on run specified by target and runname +;; -preclean : remove the existing test directory before running the test +;; -clean-cache : remove the cached megatest.config and runconfigs.config files +;; -no-cache : do not use the cached config files. +;; -one-pass : launch as many tests as you can but do not wait for more to be ready +;; -remove-keep N : remove all but N most recent runs per target; use '-actions, -age, -precmd' +;; -age : 120d,3h,20m to apply only to runs older than the +;; specified age. NB// M=month, m=minute +;; -actions [,...] : actions to take; print,remove-runs,archive,kill-runs +;; -precmd : insert a wrapper command in front of the commands run +;; +;; Selectors (e.g. use for -runtests, -remove-runs, -set-state-status, -list-runs etc.) +;; -target key1/key2/... : run for key1, key2, etc. +;; -reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfigs +;; -testpatt patt1/patt2,patt3/... : % is wildcard +;; -runname : required, name for this particular test run +;; -state : Applies to runs, tests or steps depending on context +;; -status : Applies to runs, tests or steps depending on context +;; -modepatt key : load testpatt from in runconfigs instead of default TESTPATT if -testpatt and -tagexpr are not specified +;; -tagexpr tag1,tag2%,.. : select tests with tags matching expression +;; +;; +;; Test helpers (for use inside tests) +;; -step stepname +;; -test-status : set the state and status of a test (use :state and :status) +;; -setlog logfname : set the path/filename to the final log relative to the test +;; directory. may be used with -test-status +;; -set-toplog logfname : set the overall log for a suite of sub-tests +;; -summarize-items : for an itemized test create a summary html +;; -m comment : insert a comment for this test +;; +;; Test data capture +;; -set-values : update or set values in the testdata table +;; :category : set the category field (optional) +;; :variable : set the variable name (optional) +;; :value : value measured (required) +;; :expected : value expected (required) +;; :tol : |value-expect| <= tol (required, can be <, >, >=, <= or number) +;; :units : name of the units for value, expected_value etc. (optional) +;; -load-test-data : read test specific data for storage in the test_data table +;; from standard in. Each line is comma delimited with four +;; fields category,variable,value,comment +;; +;; Queries +;; -list-runs patt : list runs matching pattern \"patt\", % is the wildcard +;; -show-keys : show the keys used in this megatest setup +;; -test-files targpatt : get the most recent test path/file matching targpatt e.g. %/% or '*.log' +;; returns list sorted by age ascending, see examples below +;; -test-paths : get the test paths matching target, runname, item and test +;; patterns. +;; -list-disks : list the disks available for storing runs +;; -list-targets : list the targets in runconfigs.config +;; -list-db-targets : list the target combinations used in the db +;; -show-config : dump the internal representation of the megatest.config file +;; -show-runconfig : dump the internal representation of the runconfigs.config file +;; -dumpmode MODE : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line) +;; -show-cmdinfo : dump the command info for a test (run in test environment) +;; -section sectionName +;; -var varName : for config and runconfig lookup value for sectionName varName +;; -since N : get list of runs changed since time N (Unix seconds) +;; -fields fieldspec : fields to include in json dump; runs:id,runame+tests:testname+steps +;; -sort fieldname : in -list-runs sort tests by this field +;; -testdata-csv [categorypatt/]varpatt : dump testdata for given category +;; +;; Misc +;; -start-dir path : switch to this directory before running megatest +;; -contour cname : add a level of hierarcy to the linktree and run paths +;; -area-tag tagname : add a tag to an area while syncing to pgdb +;; -run-tag tagname : add a tag to a run while syncing to pgdb +;; -rebuild-db : bring the database schema up to date +;; -cleanup-db : remove any orphan records, vacuum the db +;; -import-megatest.db : push data from megatest.db to cache db files in /tmp/$USER +;; -sync-to-megatest.db : pull data from cache files in /tmp/$USER to megatest.db +;; -sync-to dest : sync to new postgresql central style database +;; -update-meta : update the tests metadata for all tests +;; -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are +;; overwritten by values set in config files. +;; -server -|hostname : start the server (reduces contention on megatest.db), use +;; - to automatically figure out hostname +;; -adjutant C,M : start the server/adjutant with allocated cores C and Mem M (Gig), +;; use 0,0 to auto use full machine +;; -transport http|rpc : use http or rpc for transport (default is http) +;; -log logfile : send stdout and stderr to logfile +;; -autolog logfilebase : appends pid and host to logfilebase for logfile +;; -list-servers : list the servers +;; -kill-servers : kill all servers +;; -repl : start a repl (useful for extending megatest) +;; -load file.scm : load and run file.scm +;; -mark-incompletes : find and mark incomplete tests +;; -ping run-id|host:port : ping server, exit with 0 if found +;; -debug N|N,M,O... : enable debug 0-N or N and M and O ... +;; -debug-noprop N|M,M,O...: enable debug but do not propagate to subprocesses via MT_DEBUG +;; -config fname : override the megatest.config file with fname +;; -append-config fname : append fname to the megatest.config file +;; +;; Utilities +;; -env2file fname : write the environment to fname.csh and fname.sh +;; -envcap a : save current variables labeled as context 'a' in file envdat.db +;; -envdelta a-b : output enviroment delta from context a to context b to -o fname +;; set the output mode with -dumpmode csh, bash or ini +;; note: ini format will use calls to use curr and minimize path +;; -refdb2dat refdb : convert refdb to sexp or to format specified by s-dumpmode +;; formats: perl, ruby, sqlite3, csv (for csv the -o param +;; will substitute %s for the sheet name in generating +;; multiple sheets) +;; -o : output file for refdb2dat (defaults to stdout) +;; -archive cmd : archive runs specified by selectors to one of disks specified +;; in the [archive-disks] section. +;; cmd: keep-html, restore, save, save-remove, get, replicate-db (use +;; -dest to set destination), -include path1,path2... to get or save specific files +;; -generate-html : create a simple html dashboard for browsing your runs +;; -generate-html-structure : create a top level html veiw to list targets/runs and a Run view within each run directory. +;; -list-run-time : list time requered to complete runs. It supports following switches +;; -run-patt -target-patt -dumpmode +;; -list-test-time : list time requered to complete each test in a run. It following following arguments +;; -runname -target -dumpmode +;; -syscheck : do some very basic checks; write access and space in tmp, home, runs, links and +;; is $DISPLAY valid +;; -list-waivers : dump waivers for specified target, runname, testpatt to stdout +;; +;; Diff report +;; -diff-rep : generate diff report (must include -src-target, -src-runname, -target, -runname +;; and either -diff-email or -diff-html) +;; -src-target +;; -src-runname +;; -diff-email : comma separated list of email addresses to send diff report +;; -diff-html : path to html file to generate +;; +;; Spreadsheet generation +;; -extract-ods fname.ods : extract an open document spreadsheet from the database +;; -pathmod path : insert path, i.e. path/runame/itempath/logfile.html +;; will clear the field if no rundir/testname/itempath/logfile +;; if it contains forward slashes the path will be converted +;; to windows style +;; Getting started +;; -create-megatest-area : create a skeleton megatest area. You will be prompted for paths +;; -create-test testname : create a skeleton megatest test. You will be prompted for info +;; +;; Examples +;; +;; # Get test path, use '.' to get a single path or a specific path/file pattern +;; megatest -test-files 'logs/*.log' -target ubuntu/n%/no% -runname w49% -testpatt test_mt% +;; +;; Called as " (string-intersperse (argv) " ") " +;; Version " megatest-version ", built from " megatest-fossil-hash )) +;; +;; (define (main) +;; (make-and-init-bigdata) +;; +;; ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file +;; ;; +;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) +;; (if (common:file-exists? debugcontrolf) +;; (load debugcontrolf))) +;; +;; ;; usage logging, careful with this, it is not designed to deal with all real world challenges! +;; ;; +;; (if (and *usage-log-file* +;; (file-writable? *usage-log-file*)) +;; (with-output-to-file +;; *usage-log-file* +;; (lambda () +;; (print +;; (if *usage-use-seconds* +;; (current-seconds) +;; (time->string +;; (seconds->local-time (current-seconds)) +;; "%Yww%V.%w %H:%M:%S")) +;; " " +;; (current-user-name) " " +;; (current-directory) " " +;; "\"" (string-intersperse (argv) " ") "\"")) +;; #:append)) +;; +;; ;; -gui : start a gui interface +;; ;; -config fname : override the runconfigs file with fname +;; +;; ;; process args +;; (define remargs (args:get-args +;; (argv) +;; (list "-runtests" ;; run a specific test +;; "-config" ;; override the config file name +;; "-append-config" +;; "-execute" ;; run the command encoded in the base64 parameter +;; "-step" +;; "-target" +;; "-reqtarg" +;; ":runname" +;; "-runname" +;; ":state" +;; "-state" +;; ":status" +;; "-status" +;; "-list-runs" +;; "-testdata-csv" +;; "-testpatt" +;; "--modepatt" +;; "-modepatt" +;; "-tagexpr" +;; "-itempatt" +;; "-setlog" +;; "-set-toplog" +;; "-runstep" +;; "-logpro" +;; "-m" +;; "-rerun" +;; +;; "-days" +;; "-rename-run" +;; "-to" +;; "-dest" +;; "-source" +;; "-time-stamp" +;; ;; values and messages +;; ":category" +;; ":variable" +;; ":value" +;; ":expected" +;; ":tol" +;; ":units" +;; +;; ;; misc +;; "-start-dir" +;; "-run-patt" +;; "-target-patt" +;; "-contour" +;; "-area-tag" +;; "-area" +;; "-run-tag" +;; "-server" +;; "-db" ;; file name for setting up a server +;; "-adjutant" +;; "-transport" +;; "-port" +;; "-extract-ods" +;; "-pathmod" +;; "-env2file" +;; "-envcap" +;; "-envdelta" +;; "-setvars" +;; "-set-state-status" +;; +;; ;; move runs stuff here +;; "-remove-keep" +;; "-set-run-status" +;; "-age" +;; +;; ;; archive +;; "-archive" +;; "-actions" +;; "-precmd" +;; "-include" +;; "-exclude-rx" +;; "-exclude-rx-from" +;; +;; "-debug" ;; for *verbosity* > 2 +;; "-debug-noprop" +;; "-create-test" +;; "-override-timeout" +;; "-test-files" ;; -test-paths is for listing all +;; "-load" ;; load and exectute a scheme file +;; "-section" +;; "-var" +;; "-dumpmode" +;; "-run-id" +;; "-ping" +;; "-refdb2dat" +;; "-o" +;; "-log" +;; "-autolog" +;; "-sync-log" +;; "-since" +;; "-fields" +;; "-recover-test" ;; run-id,test-id - used internally to recover a test stuck in RUNNING state +;; "-sort" +;; "-target-db" +;; "-source-db" +;; "-prefix-target" +;; +;; "-src-target" +;; "-src-runname" +;; "-diff-email" +;; "-sync-to" +;; "-pgsync" +;; "-kill-wait" ;; wait this long before removing test (default is 10 sec) +;; "-diff-html" +;; +;; ;; wizards, area capture, setup new ... +;; "-extract-skeleton" +;; ) +;; (list "-h" "-help" "--help" +;; "-manual" +;; "-version" +;; "-force" +;; "-xterm" +;; "-showkeys" +;; "-show-keys" +;; "-test-status" +;; "-set-values" +;; "-load-test-data" +;; "-summarize-items" +;; "-gui" +;; "-daemonize" +;; "-preclean" +;; "-rerun-clean" +;; "-rerun-all" +;; "-clean-cache" +;; "-no-cache" +;; "-cache-db" +;; "-cp-eventtime-to-publishtime" +;; "-use-db-cache" +;; "-prepend-contour" +;; +;; +;; ;; misc +;; "-repl" +;; "-lock" +;; "-unlock" +;; "-list-servers" +;; "-kill-servers" +;; "-run-wait" ;; wait on a run to complete (i.e. no RUNNING) +;; "-one-pass" ;; +;; "-local" ;; run some commands using local db access +;; "-generate-html" +;; "-generate-html-structure" +;; "-list-run-time" +;; "-list-test-time" +;; +;; ;; misc queries +;; "-list-disks" +;; "-list-targets" +;; "-list-db-targets" +;; "-show-runconfig" +;; "-show-config" +;; "-show-cmdinfo" +;; "-get-run-status" +;; "-list-waivers" +;; +;; ;; queries +;; "-test-paths" ;; get path(s) to a test, ordered by youngest first +;; +;; "-runall" ;; run all tests, respects -testpatt, defaults to % +;; "-run" ;; alias for -runall +;; "-remove-runs" +;; "-kill-runs" +;; "-kill-rerun" +;; "-keep-records" ;; use with -remove-runs to remove only the run data +;; "-rebuild-db" +;; "-cleanup-db" +;; "-rollup" +;; "-update-meta" +;; "-create-megatest-area" +;; "-mark-incompletes" +;; +;; "-convert-to-norm" +;; "-convert-to-old" +;; "-import-megatest.db" +;; "-sync-to-megatest.db" +;; "-sync-brute-force" +;; "-logging" +;; "-v" ;; verbose 2, more than normal (normal is 1) +;; "-q" ;; quiet 0, errors/warnings only +;; +;; "-diff-rep" +;; +;; "-syscheck" +;; "-obfuscate" +;; ;; junk placeholder +;; ;; "-:p" +;; +;; ) +;; args:arg-hash +;; 0)) +;; +;; ;; Add args that use remargs here +;; ;; +;; (if (and (not (null? remargs)) +;; (not (or +;; (args:get-arg "-runstep") +;; (args:get-arg "-envcap") +;; (args:get-arg "-envdelta") +;; ) +;; )) +;; (debug:print-error 0 *default-log-port* "Unrecognised arguments: " (string-intersperse (if (list? remargs) remargs (argv)) " "))) +;; +;; ;; before doing anything else change to the start-dir if provided +;; ;; +;; (if (args:get-arg "-start-dir") +;; (if (common:file-exists? (args:get-arg "-start-dir")) +;; (let ((fullpath (common:real-path (args:get-arg "-start-dir")))) +;; (set-environment-variable! "PWD" fullpath) +;; (change-directory fullpath)) +;; (begin +;; (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.") +;; (exit 1)))) +;; +;; ;; immediately set MT_TARGET if -reqtarg or -target are available +;; ;; +;; (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) +;; (if targ (set-environment-variable! "MT_TARGET" targ))) +;; +;; ;; The watchdog is to keep an eye on things like db sync etc. +;; ;; +;; ;; (init-watchdog) +;; +;; ;; (define (debug:debug-mode n) +;; ;; (cond +;; ;; ((and (number? *verbosity*) ;; number number +;; ;; (number? n)) +;; ;; (<= n *verbosity*)) +;; ;; ((and (list? *verbosity*) ;; list number +;; ;; (number? n)) +;; ;; (member n *verbosity*)) +;; ;; ((and (list? *verbosity*) ;; list list +;; ;; (list? n)) +;; ;; (not (null? (lset-intersection! eq? *verbosity* n)))) +;; ;; ((and (number? *verbosity*) +;; ;; (list? n)) +;; ;; (member *verbosity* n)))) +;; +;; ;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not +;; ;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation +;; ;; where (launch:setup) returns #f? +;; ;; +;; (if (or (args:get-arg "-log") ;;(args:get-arg "-server") ;; redirect the log always when a server +;; (args:get-arg "-autolog")) +;; (handle-exceptions +;; exn +;; (begin +;; (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) +;; ) +;; (let* ((tl (or (args:get-arg "-log") +;; (args:get-arg "-autolog") ;; autolog provides the basename .../logs/something- for the logfile +;; (launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified +;; (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name +;; (conc tl (current-process-id)"-"(get-host-name)".log") +;; (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) +;; (oup (open-logfile logf))) +;; (if (not (args:get-arg "-log")) +;; (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log +;; (debug:print-info 0 *default-log-port* "Sending log output to " logf) +;; (set! *default-log-port* oup)))) +;; +;; (if (or (args:get-arg "-h") +;; (args:get-arg "-help") +;; (args:get-arg "--help")) +;; (begin +;; (print help) +;; (exit))) +;; +;; (if (args:get-arg "-manual") +;; (let* ((htmlviewercmd (or (configf:lookup *configdat* "setup" "htmlviewercmd") +;; (common:which '("firefox" "arora")))) +;; (install-home (common:get-install-area)) +;; (manual-html (conc install-home "/share/docs/megatest_manual.html"))) +;; (if (and install-home +;; (common:file-exists? manual-html)) +;; (system (conc "(" htmlviewercmd " " manual-html " ) &")) +;; (system (conc "(" htmlviewercmd " http://www.kiatoa.com/cgi-bin/fossils/megatest/doc/tip/docs/manual/megatest_manual.html ) &"))) +;; (exit))) +;; +;; (if (args:get-arg "-version") +;; (begin +;; (print (common:version-signature)) ;; (print megatest-version) +;; (exit))) +;; +;; ;; Overall exit handling setup immediately +;; ;; +;; (if (or (args:get-arg "-process-reap")) +;; ;; (args:get-arg "-runtests") +;; ;; (args:get-arg "-execute") +;; ;; (args:get-arg "-remove-runs") +;; ;; (args:get-arg "-runstep")) +;; (let ((original-exit (exit-handler))) +;; (exit-handler (lambda (#!optional (exit-code 0)) +;; (printf "Preparing to exit with exit code ~A ...\n" exit-code) +;; (for-each +;; +;; (lambda (pid) +;; (handle-exceptions +;; exn +;; (begin +;; (printf "process reap failed. exn=~A\n" exn) +;; #t) +;; (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) +;; (if (or (eq? pid-val pid) +;; (eq? pid-val 0)) +;; (begin +;; (printf "Sending signal/term to ~A\n" pid) +;; (process-signal pid signal/term)))))) +;; (process:children #f)) +;; (original-exit exit-code))))) +;; +;; ;; for some switches always print the command to stderr +;; ;; +;; (if (args:any-defined? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") +;; (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) +;; +;; ;; some switches imply homehost. Exit here if not on homehost +;; ;; +;; #;(let ((homehost-required (list "-cleanup-db" "-server"))) +;; (if (apply args:any-defined? homehost-required) +;; (if (not (common:on-homehost?)) +;; (for-each +;; (lambda (switch) +;; (if (args:get-arg switch) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch +;; ", you can move homehost by removing the .homehost file but this will disrupt any runs in progress.") +;; (exit 1)))) +;; homehost-required)))) +;; +;; ;;====================================================================== +;; ;; Misc setup stuff +;; ;;====================================================================== +;; +;; (debug:setup) +;; +;; (if (args:get-arg "-logging")(set! *logging* #t)) +;; +;; ;;(if (debug:debug-mode 3) ;; we are obviously debugging +;; ;; (set! open-run-close open-run-close-no-exception-handling)) +;; +;; (if (args:get-arg "-itempatt") +;; (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt")))) +;; (debug:print 0 *default-log-port* "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval) +;; (hash-table-set! args:arg-hash "-testpatt" newval) +;; (hash-table-delete! args:arg-hash "-itempatt"))) +;; +;; (if (args:get-arg "-runtests") +;; (debug:print 0 *default-log-port* "WARNING: \"-runtests\" is deprecated. Use \"-run\" with \"-testpatt\" instead")) +;; +;; ;; (debug:print 0 *default-log-port* "on-exit disabled. Please re-enable") +;; (on-exit std-exit-procedure) +;; +;; ;;====================================================================== +;; ;; Misc general calls +;; ;;====================================================================== +;; +;; ;; TODO: Restore this functionality +;; +;; #; (if (and (args:get-arg "-cache-db") +;; (args:get-arg "-source-db")) +;; (let* ((temp-dir (or (args:get-arg "-target-db") (create-directory (conc "/tmp/" (get-environment-variable "USER") "/" (string-translate (current-directory) "/" "_"))))) +;; (target-db (conc temp-dir "/cached.db")) +;; (source-db (args:get-arg "-source-db"))) +;; (db:cache-for-read-only source-db target-db) +;; (set! *didsomething* #t))) +;; +;; ;; handle a clean-cache request as early as possible +;; ;; +;; (if (args:get-arg "-clean-cache") +;; (let ((toppath (launch:setup))) +;; (set! *didsomething* #t) ;; suppress the help output. +;; (runs:clean-cache (common:args-get-target) +;; (args:get-arg "-runname") +;; toppath))) +;; +;; (if (args:get-arg "-env2file") +;; (begin +;; (save-environment-as-files (args:get-arg "-env2file")) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-list-disks") +;; (let ((toppath (launch:setup))) +;; (print +;; (string-intersperse +;; (map (lambda (x) +;; (string-intersperse +;; x +;; " => ")) +;; (common:get-disks *configdat*)) +;; "\n")) +;; (set! *didsomething* #t))) +;; +;; +;; (if (args:get-arg "-refdb2dat") +;; (let* ((input-db (args:get-arg "-refdb2dat")) +;; (out-file (args:get-arg "-o")) +;; (out-fmt (or (args:get-arg "-dumpmode") "scheme")) +;; (out-port (if (and out-file +;; (not (member out-fmt '("sqlite3" "csv")))) +;; (open-output-file out-file) +;; (current-output-port))) +;; (res-data (configf:read-refdb input-db)) +;; (data (car res-data)) +;; (msg (cadr res-data))) +;; (if (not data) +;; (debug:print 0 *default-log-port* "Bad input? data=" data) ;; some error occurred +;; (with-output-to-port out-port +;; (lambda () +;; (case (string->symbol out-fmt) +;; ((scheme)(pp data)) +;; ((perl) +;; ;; (print "%hash = (") +;; ;; key1 => 'value1', +;; ;; key2 => 'value2', +;; ;; key3 => 'value3', +;; ;; ); +;; (configf:map-all-hier-alist +;; data +;; (lambda (sheetname sectionname varname val) +;; (print "$data{\"" sheetname "\"}{\"" sectionname "\"}{\"" varname "\"} = \"" val "\";")))) +;; ((python ruby) +;; (print "data={}") +;; (configf:map-all-hier-alist +;; data +;; (lambda (sheetname sectionname varname val) +;; (print "data[\"" sheetname "\"][\"" sectionname "\"][\"" varname "\"] = \"" val "\"")) +;; initproc1: +;; (lambda (sheetname) +;; (print "data[\"" sheetname "\"] = {}")) +;; initproc2: +;; (lambda (sheetname sectionname) +;; (print "data[\"" sheetname "\"][\"" sectionname "\"] = {}")))) +;; ((csv) +;; (let* ((results (make-hash-table)) ;; (make-sparse-array))) +;; (row-cols (make-hash-table))) ;; hash of hashes where section => ht { row- => num or col- => num +;; ;; (print "data=") +;; ;; (pp data) +;; (configf:map-all-hier-alist +;; data +;; (lambda (sheetname sectionname varname val) +;; ;; (print "sheetname: " sheetname ", sectionname: " sectionname ", varname: " varname ", val: " val) +;; (let* ((dat (get-dat results sheetname)) +;; (vec (refdb:csv-get-svec dat)) +;; (rownames (refdb:csv-get-rows dat)) +;; (colnames (refdb:csv-get-cols dat)) +;; (currrown (hash-table-ref/default rownames varname #f)) +;; (currcoln (hash-table-ref/default colnames sectionname #f)) +;; (rown (or currrown +;; (let* ((lastn (refdb:csv-get-maxrow dat)) +;; (newrown (+ lastn 1))) +;; (refdb:csv-set-maxrow! dat newrown) +;; newrown))) +;; (coln (or currcoln +;; (let* ((lastn (refdb:csv-get-maxcol dat)) +;; (newcoln (+ lastn 1))) +;; (refdb:csv-set-maxcol! dat newcoln) +;; newcoln)))) +;; (if (not (sparse-array-ref vec 0 coln)) ;; (eq? rown 0) +;; (begin +;; (sparse-array-set! vec 0 coln sectionname) +;; ;; (print "sparse-array-ref " 0 "," coln "=" (sparse-array-ref vec 0 coln)) +;; )) +;; (if (not (sparse-array-ref vec rown 0)) ;; (eq? coln 0) +;; (begin +;; (sparse-array-set! vec rown 0 varname) +;; ;; (print "sparse-array-ref " rown "," 0 "=" (sparse-array-ref vec rown 0)) +;; )) +;; (if (not currrown)(hash-table-set! rownames varname rown)) +;; (if (not currcoln)(hash-table-set! colnames sectionname coln)) +;; ;; (print "dat=" dat ", rown=" rown ", coln=" coln) +;; (sparse-array-set! vec rown coln val) +;; ;; (print "sparse-array-ref " rown "," coln "=" (sparse-array-ref vec rown coln)) +;; ))) +;; (for-each +;; (lambda (sheetname) +;; (let* ((sheetdat (get-dat results sheetname)) +;; (svec (refdb:csv-get-svec sheetdat)) +;; (maxrow (refdb:csv-get-maxrow sheetdat)) +;; (maxcol (refdb:csv-get-maxcol sheetdat)) +;; (fname (if out-file +;; (string-substitute "%s" sheetname out-file) ;; "/foo/bar/%s.csv") +;; (conc sheetname ".csv")))) +;; (with-output-to-file fname +;; (lambda () +;; ;; (print "Sheetname: " sheetname) +;; (let loop ((row 0) +;; (col 0) +;; (curr-row '()) +;; (result '())) +;; (let* ((val (sparse-array-ref svec row col)) +;; (disp-val (if val +;; (conc "\"" val "\"") +;; ""))) +;; (if (> col 0)(display ",")) +;; (display disp-val) +;; (cond +;; ((> row maxrow)(display "\n") result) +;; ((>= col maxcol) +;; (display "\n") +;; (loop (+ row 1) 0 '() (append result (list curr-row)))) +;; (else +;; (loop row (+ col 1) (append curr-row (list val)) result))))))))) +;; (hash-table-keys results)))) +;; ((sqlite3) +;; (let* ((db-file (or out-file (pathname-file input-db))) +;; (db-exists (common:file-exists? db-file)) +;; (db (sqlite3:open-database db-file))) +;; (if (not db-exists)(sqlite3:execute db "CREATE TABLE data (sheet,section,var,val);")) +;; (configf:map-all-hier-alist +;; data +;; (lambda (sheetname sectionname varname val) +;; (sqlite3:execute db +;; "INSERT OR REPLACE INTO data (sheet,section,var,val) VALUES (?,?,?,?);" +;; sheetname sectionname varname val))) +;; (sqlite3:finalize! db))) +;; (else +;; (pp data)))))) +;; (if out-file (close-output-port out-port)) +;; (exit) ;; yes, bending the rules here - need to exit since this is a utility +;; )) +;; +;; ;; disabled for now +;; +;; #;(if (args:get-arg "-ping") +;; (let* ((server-id (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":" +;; (host:port (args:get-arg "-ping"))) +;; (server-ready? (or server-id host:port) #f do-exit: #t))) +;; +;; ;;====================================================================== +;; ;; Capture, save and manipulate environments +;; ;;====================================================================== +;; +;; ;; NOTE: Keep these above the section where the server or client code is setup +;; +;; (let ((envcap (args:get-arg "-envcap"))) +;; (if envcap +;; (let* ((db (env:open-db (if (null? remargs) "envdat.db" (car remargs))))) +;; (env:save-env-vars db envcap) +;; (env:close-database db) +;; (set! *didsomething* #t)))) +;; +;; ;; delta "language" will eventually be res=a+b-c but for now it is just res=a-b +;; ;; +;; (let ((envdelta (args:get-arg "-envdelta"))) +;; (if envdelta +;; (let ((match (string-split envdelta "-")));; (string-match "([a-z0-9_]+)=([a-z0-9_\\-,]+)" envdelta))) +;; (if (not (null? match)) +;; (let* ((db (env:open-db (if (null? remargs) "envdat.db" (car remargs)))) +;; ;; (resctx (cadr match)) +;; ;; (equn (caddr match)) +;; (parts match) ;; (string-split equn "-")) +;; (minuend (car parts)) +;; (subtraend (cadr parts)) +;; (added (env:get-added db minuend subtraend)) +;; (removed (env:get-removed db minuend subtraend)) +;; (changed (env:get-changed db minuend subtraend))) +;; ;; (pp (hash-table->alist added)) +;; ;; (pp (hash-table->alist removed)) +;; ;; (pp (hash-table->alist changed)) +;; (if (args:get-arg "-o") +;; (with-output-to-file +;; (args:get-arg "-o") +;; (lambda () +;; (env:print added removed changed))) +;; (env:print added removed changed)) +;; (env:close-database db) +;; (set! *didsomething* #t)) +;; (debug:print-error 0 *default-log-port* "Parameter to -envdelta should be new=start-end"))))) +;; +;; ;;====================================================================== +;; ;; Start the server - can be done in conjunction with -runall or -runtests (one day...) +;; ;; we start the server if not running else start the client thread +;; ;;====================================================================== +;; +;; ;; Server? Start up here. +;; ;; +;; (if (args:get-arg "-server") +;; (if (not (args:get-arg "-db")) +;; (debug:print 0 *default-log-port* "ERROR: -db required to start server") +;; (let ((tl (launch:setup)) +;; (dbname (args:get-arg "-db"))) ;; transport-type (string->symbol (or (args:get-arg "-transport") "http")))) +;; (rmt:server-launch dbname) +;; (set! *didsomething* #t)))) +;; +;; ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to +;; ;; a specific Megatest area. Detail are being hashed out and this may change. +;; ;; +;; (if (args:get-arg "-adjutant") +;; (begin +;; (adjutant-run) +;; (set! *didsomething* #t))) +;; +;; (if (or (args:get-arg "-list-servers") +;; (args:get-arg "-kill-servers")) +;; (let ((tl (launch:setup))) +;; (if tl ;; all roads from here exit +;; (let* ((servers (rmt:get-servers-info *toppath*)) +;; (fmtstr "~8a~22a~20a~20a~8a\n")) +;; ;; id INTEGER PRIMARY KEY, +;; ;; host TEXT, +;; ;; port INTEGER, +;; ;; servkey TEXT, +;; ;; pid TEXT, +;; ;; ipaddr TEXT, +;; ;; apath TEXT, +;; ;; dbname TEXT, +;; ;; event_time +;; (format #t fmtstr "pid" "Interface:port" "State" "dbname" "apath") +;; (format #t fmtstr "===" "==============" "=====" "======" "=====") +;; (for-each ;; ( mod-time host port start-time pid ) +;; (lambda (server) +;; (match-let +;; (((id host port servkey pid ipaddr apath dbname event_time) server)) +;; (format #t +;; fmtstr +;; pid +;; (conc host":"port) +;; (if (server-ready? host port servkey) "Running" "Dead") +;; dbname ;; (seconds->hr-min-sec mod) +;; apath +;; ) +;; (if (args:get-arg "-kill-servers") +;; (begin +;; (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid " !!needs completion!!") +;; #;(server:kill server))))) +;; servers) +;; ;; (debug:print-info 1 *default-log-port* "Done with listservers") +;; (set! *didsomething* #t) +;; (exit)) +;; (exit)))) +;; ;; must do, would have to add checks to many/all calls below +;; +;; ;;====================================================================== +;; ;; Weird special calls that need to run *after* the server has started? +;; ;;====================================================================== +;; +;; (if (args:get-arg "-list-targets") +;; (if (launch:setup) +;; (let* ((rconfdat (configf:read-config (conc *toppath* "/runconfigs.config") #f #f)) +;; (targets (common:get-runconfig-targets rconfdat))) +;; ;; (debug:print 1 *default-log-port* "Found "(length targets) " targets") +;; (case (string->symbol (or (args:get-arg "-dumpmode") "alist")) +;; ((alist) +;; (for-each (lambda (x) +;; ;; (print "[" x "]")) +;; (print x)) +;; targets)) +;; ((json) +;; (json-write targets)) +;; (else +;; (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets"))) +;; (set! *didsomething* #t)))) +;; +;; +;; (if (args:get-arg "-show-runconfig") +;; (let ((tl (launch:setup))) +;; (push-directory *toppath*) +;; (let ((data (full-runconfigs-read))) +;; ;; keep this one local +;; (cond +;; ((and (args:get-arg "-section") +;; (args:get-arg "-var")) +;; (let ((val (or (configf:lookup data (args:get-arg "-section")(args:get-arg "-var")) +;; (configf:lookup data "default" (args:get-arg "-var"))))) +;; (if val (print val)))) +;; ((or (not (args:get-arg "-dumpmode")) +;; (string=? (args:get-arg "-dumpmode") "ini")) +;; (configf:config->ini data)) +;; ((string=? (args:get-arg "-dumpmode") "sexp") +;; (pp (hash-table->alist data))) +;; ((string=? (args:get-arg "-dumpmode") "json") +;; (json-write data)) +;; (else +;; (debug:print-error 0 *default-log-port* "-dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) +;; (set! *didsomething* #t)) +;; (pop-directory))) +;; +;; (if (args:get-arg "-show-config") +;; (let ((tl (launch:setup)) +;; (data *configdat*)) ;; (configf:read-config "megatest.config" #f #t))) +;; (push-directory *toppath*) +;; ;; keep this one local +;; (cond +;; ((and (args:get-arg "-section") +;; (args:get-arg "-var")) +;; (let ((val (configf:lookup data (args:get-arg "-section")(args:get-arg "-var")))) +;; (if val (print val)))) +;; +;; ;; print just a section if only -section +;; +;; ((equal? (args:get-arg "-dumpmode") "sexp") +;; (pp (hash-table->alist data))) +;; ((equal? (args:get-arg "-dumpmode") "json") +;; (json-write data)) +;; ((or (not (args:get-arg "-dumpmode")) +;; (string=? (args:get-arg "-dumpmode") "ini")) +;; (configf:config->ini data)) +;; (else +;; (debug:print-error 0 *default-log-port* "-dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) +;; (set! *didsomething* #t) +;; (pop-directory) +;; (bdat-time-to-exit-set! *bdat* #t))) +;; +;; (if (args:get-arg "-show-cmdinfo") +;; (if (or (args:get-arg ":value")(get-environment-variable "MT_CMDINFO")) +;; (let ((data (common:read-encoded-string (or (args:get-arg ":value")(get-environment-variable "MT_CMDINFO"))))) +;; (if (equal? (args:get-arg "-dumpmode") "json") +;; (json-write data) +;; (pp data)) +;; (set! *didsomething* #t)) +;; (debug:print-info 0 *default-log-port* "environment variable MT_CMDINFO is not set"))) +;; +;; ;;====================================================================== +;; ;; Remove old run(s) +;; ;;====================================================================== +;; +;; ;; since several actions can be specified on the command line the removal +;; ;; is done first +;; (define (operate-on action #!key (mode #f)(target-in #f)(runname-in #f)(keys-in #f)(keyvals-in #f)) ;; #f is "use default" +;; (let* ((runrec (runs:runrec-make-record)) +;; (target (or target-in (common:args-get-target))) ;; eventually get rid of the call to common:args-get-target +;; (runname (or runname-in +;; (args:get-arg "-runname"))) ;; eventually get rid of the get-arg calls +;; (testpatt (or (args:get-arg "-testpatt") +;; (and (eq? action 'archive) ;; if it is an archive command fallback to MT_TEST_NAME and MT_ITEMPATH +;; (common:get-full-test-name)) +;; (and (eq? action 'kill-runs) +;; "%/%") ;; I'm just guessing that this is correct :( +;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt"))) +;; ))) ;; +;; (cond +;; ((not target) +;; (debug:print-error 0 *default-log-port* "Missing required parameter for " +;; action ", you must specify -target or -reqtarg") +;; (exit 1)) +;; ((not runname) +;; (debug:print-error 0 *default-log-port* "Missing required parameter for " +;; action ", you must specify the run name pattern with -runname patt") +;; (exit 2)) +;; ((not testpatt) +;; (debug:print-error 0 *default-log-port* "Missing required parameter for " +;; action ", you must specify the test pattern with -testpatt") +;; (exit 3)) +;; (else +;; (if (not (car *configinfo*)) +;; (begin +;; (debug:print-error 0 *default-log-port* "Attempted " action "on test(s) but run area config file not found") +;; (exit 1)) +;; ;; put test parameters into convenient variables +;; (begin +;; ;; check for correct version, exit with message if not correct +;; +;; ;; TODO: restore this functionality +;; +;; ;; (common:exit-on-version-changed) +;; +;; (runs:operate-on action +;; target +;; runname +;; testpatt +;; state: (common:args-get-state) +;; status: (common:args-get-status) +;; new-state-status: (args:get-arg "-set-state-status") +;; mode: mode))) +;; (set! *didsomething* #t))))) +;; +;; (if (args:get-arg "-kill-runs") +;; (general-run-call +;; "-kill-runs" +;; "kill runs" +;; (lambda (target runname keys keyvals) +;; (operate-on 'kill-runs mode: #f) +;; ))) +;; +;; (if (args:get-arg "-kill-rerun") +;; (let* ((target-patt (common:args-get-target)) +;; (runname-patt (args:get-arg "-runname"))) +;; (cond ((not target-patt) +;; (debug:print-error 0 *default-log-port* "Missing target, must specify target for -kill-rerun with -target ") +;; (exit 1)) +;; ((not runname-patt) +;; (debug:print-error 0 *default-log-port* "Missing runname, must specify runname for -kill-rerun with -runname ") +;; (exit 1)) +;; ((string-search "[ ,%]" target-patt) +;; (debug:print-error 0 *default-log-port* "Invalid target ["target-patt"], must specify exact target (no wildcards) for -kill-rerun with -target ") +;; (exit 1)) +;; ((string-search "[ ,%]" runname-patt) +;; (debug:print-error 0 *default-log-port* "Invalid runname ["runname-patt"], must specify exact runname (no wildcards) for -kill-rerun with -runname ") +;; (exit 1)) +;; (else +;; (general-run-call +;; "-kill-runs" +;; "kill runs" +;; (lambda (target runname keys keyvals) +;; (operate-on 'kill-runs mode: #f) +;; )) +;; +;; (thread-sleep! 15)) +;; ;; fall thru and let "-run" loop fire +;; ))) +;; +;; +;; (if (args:get-arg "-remove-runs") +;; (general-run-call +;; "-remove-runs" +;; "remove runs" +;; (lambda (target runname keys keyvals) +;; (operate-on 'remove-runs mode: (if (args:get-arg "-keep-records") +;; 'remove-data-only +;; 'remove-all))))) +;; +;; (if (args:get-arg "-remove-keep") +;; (general-run-call +;; "-remove-keep" +;; "remove keep" +;; (lambda (target runname keys keyvals) +;; (let ((actions (map string->symbol +;; (string-split +;; (or (args:get-arg "-actions") +;; "print") +;; ",")))) ;; default to printing the output +;; (runs:remove-all-but-last-n-runs-per-target target runname +;; (string->number (args:get-arg "-remove-keep")) +;; actions: actions))))) +;; +;; (if (args:get-arg "-set-state-status") +;; (general-run-call +;; "-set-state-status" +;; "set state and status" +;; (lambda (target runname keys keyvals) +;; (operate-on 'set-state-status)))) +;; +;; (if (or (args:get-arg "-set-run-status") +;; (args:get-arg "-get-run-status")) +;; (general-run-call +;; "-set-run-status" +;; "set run status" +;; (lambda (target runname keys keyvals) +;; (let* ((runsdat (rmt:get-runs-by-patt keys runname +;; (common:args-get-target) +;; #f #f #f #f)) +;; (header (vector-ref runsdat 0)) +;; (rows (vector-ref runsdat 1))) +;; (if (null? rows) +;; (begin +;; (debug:print-info 0 *default-log-port* "No matching run found.") +;; (exit 1)) +;; (let* ((row (car (vector-ref runsdat 1))) +;; (run-id (db:get-value-by-header row header "id"))) +;; (if (args:get-arg "-set-run-status") +;; (rmt:set-run-status run-id (args:get-arg "-set-run-status") msg: (args:get-arg "-m")) +;; (print (rmt:get-run-status run-id)) +;; ))))))) +;; +;; ;;====================================================================== +;; ;; Query runs +;; ;;====================================================================== +;; +;; ;; -fields runs:id,target,runname,comment+tests:id,testname,item_path+steps +;; ;; +;; ;; csi> (extract-fields-constraints "runs:id,target,runname,comment+tests:id,testname,item_path+steps") +;; ;; => (("runs" "id" "target" "runname" "comment") ("tests" "id" "testname" "item_path") ("steps")) +;; ;; +;; ;; NOTE: remember that the cdr will be the list you expect (cdr ("runs" "id" "target" "runname" "comment")) => ("id" "target" "runname" "comment") +;; ;; and so alist-ref will yield what you expect +;; ;; +;; (define (extract-fields-constraints fields-spec) +;; (map (lambda (table-spec) ;; runs:id,target,runname +;; (let ((dat (string-split table-spec ":"))) ;; ("runs" "id,target,runname") +;; (if (> (length dat) 1) +;; (cons (car dat)(string-split (cadr dat) ",")) ;; "id,target,runname" +;; dat))) +;; (string-split fields-spec "+"))) +;; +;; (define (get-value-by-fieldname datavec test-field-index fieldname) +;; (let ((indx (hash-table-ref/default test-field-index fieldname #f))) +;; (if indx +;; (if (>= indx (vector-length datavec)) +;; #f ;; index too high, should raise an error I suppose +;; (vector-ref datavec indx)) +;; #f))) +;; +;; +;; +;; +;; +;; (when (args:get-arg "-testdata-csv") +;; (if (launch:setup) +;; (let* ((keys (rmt:get-keys)) ;; (db:get-keys dbstruct)) +;; (runpatt (or (args:get-arg "-runname") "%")) +;; (testpatt (common:args-get-testpatt #f)) +;; (datapatt (args:get-arg "-testdata-csv")) +;; (match-data (string-match "^([^/]+)/(.*)" (args:get-arg "-testdata-csv"))) +;; (categorypatt (if match-data (list-ref match-data 1) "%")) +;; (setvarpatt (if match-data +;; (list-ref match-data 2) +;; (args:get-arg "-testdata-csv"))) +;; (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") +;; (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) +;; (header (db:get-header runsdat)) +;; (access-mode (db:get-access-mode)) +;; (testpatt (common:args-get-testpatt #f)) +;; (fields-spec (if (args:get-arg "-fields") +;; (extract-fields-constraints (args:get-arg "-fields")) +;; (list (cons "runs" (append keys (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count"))) +;; (cons "tests" db:test-record-fields) ;; "id" "testname" "test_path") +;; (list "steps" "id" "stepname")))) +;; (tests-spec (let ((t (alist-ref "tests" fields-spec equal?))) +;; (if (and t (null? t)) ;; all fields +;; db:test-record-fields +;; t))) +;; (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) +;; (test-field-index (make-hash-table)) +;; (runs (db:get-rows runsdat)) +;; ) +;; (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec +;; (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec))) +;; (if (null? invalid-tests-spec) +;; ;; generate the lookup map test-field-name => index-number +;; (let loop ((hed (car adj-tests-spec)) +;; (tal (cdr adj-tests-spec)) +;; (idx 0)) +;; (hash-table-set! test-field-index hed idx) +;; (if (not (null? tal))(loop (car tal)(cdr tal)(+ idx 1)))) +;; (begin +;; (debug:print-error 0 *default-log-port* "Invalid test fields specified: " (string-intersperse invalid-tests-spec ", ")) +;; (exit))))) +;; (let* ((table-header (string-split "target,run,test,itempath,category,var,value,comment" ",")) +;; (table-rows +;; (apply append (map +;; (lambda (run) +;; (let* ((target (string-intersperse (map (lambda (x) +;; (db:get-value-by-header run header x)) +;; keys) "/")) +;; (statuses (string-split (or (args:get-arg "-status") "") ",")) +;; (run-id (db:get-value-by-header run header "id")) +;; (runname (db:get-value-by-header run header "runname")) +;; (states (string-split (or (args:get-arg "-state") "") ",")) +;; (tests (if tests-spec +;; (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc +;; ;; use qryvals if test-spec provided +;; (if tests-spec +;; (string-intersperse adj-tests-spec ",") +;; ;; db:test-record-fields +;; #f) +;; #f +;; 'normal) +;; '()))) +;; (apply append +;; (map +;; (lambda (test) +;; (let* ( +;; (test-id (if (member "id" tests-spec)(get-value-by-fieldname test test-field-index "id" ) #f)) ;; (db:test-get-id test)) +;; (testname (if (member "testname" tests-spec)(get-value-by-fieldname test test-field-index "testname" ) #f)) ;; (db:test-get-testname test)) +;; (itempath (if (member "item_path" tests-spec)(get-value-by-fieldname test test-field-index "item_path" ) #f)) ;; (db:test-get-item-path test)) +;; (fullname (conc testname +;; (if (equal? itempath "") +;; "" +;; (conc "/" itempath )))) +;; (testdat-raw (map vector->list (rmt:read-test-data-varpatt run-id test-id categorypatt setvarpatt))) +;; (testdat (filter +;; (lambda (x) +;; (not (equal? "logpro" +;; (list-ref x 10)))) +;; testdat-raw))) +;; (map +;; (lambda (item) +;; (receive (id test_id category +;; variable value expected +;; tol units comment status type) +;; (apply values item) +;; (list target runname testname itempath category variable value comment))) +;; testdat))) +;; tests)))) +;; runs)))) +;; (print (string-join table-header ",")) +;; (for-each (lambda(table-row) +;; (print (string-join (map ->string table-row) ","))) +;; +;; +;; table-rows)))) +;; (set! *didsomething* #t) +;; (bdat-time-to-exit-set! *bdat* #t)) +;; +;; +;; +;; ;; NOTE: list-runs and list-db-targets operate on local db!!! +;; ;; +;; ;; IDEA: megatest list -runname blah% ... +;; ;; +;; (if (or (args:get-arg "-list-runs") +;; (args:get-arg "-list-db-targets")) +;; (if (launch:setup) +;; (let* (;; (dbstruct (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local"))) +;; (runpatt (args:get-arg "-list-runs")) +;; (access-mode (db:get-access-mode)) +;; (testpatt (common:args-get-testpatt #f)) +;; ;; (if (args:get-arg "-testpatt") +;; ;; (args:get-arg "-testpatt") +;; ;; "%")) +;; (keys (rmt:get-keys)) ;; (db:get-keys dbstruct)) +;; ;; (runsdat (db:get-runs dbstruct runpatt #f #f '())) +;; ;; (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") (common:args-get-target) ;; (db:get-runs-by-patt dbstruct keys (or runpatt "%") (common:args-get-target) +;; ;; #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) +;; (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") +;; (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) +;; (runstmp (db:get-rows runsdat)) +;; (header (db:get-header runsdat)) +;; ;; this is "-since" support. This looks at last mod times of .db files +;; ;; and collects those modified since the -since time. +;; (runs runstmp) +;; ;; (if (and (not (null? runstmp)) +;; ;; (args:get-arg "-since")) +;; ;; (let ((changed-ids (db:get-changed-run-ids (string->number (args:get-arg "-since"))))) +;; ;; (let loop ((hed (car runstmp)) +;; ;; (tal (cdr runstmp)) +;; ;; (res '())) +;; ;; (let ((new-res (if (member (db:get-value-by-header hed header "id") changed-ids) +;; ;; (cons hed res) +;; ;; res))) +;; ;; (if (null? tal) +;; ;; (reverse new-res) +;; ;; (loop (car tal)(cdr tal) new-res))))) +;; ;; runstmp)) +;; (db-targets (args:get-arg "-list-db-targets")) +;; (seen (make-hash-table)) +;; (dmode (let ((d (args:get-arg "-dumpmode"))) ;; json, sexpr +;; (if d (string->symbol d) #f))) +;; (data (make-hash-table)) +;; (fields-spec (if (args:get-arg "-fields") +;; (extract-fields-constraints (args:get-arg "-fields")) +;; (list (cons "runs" (append keys (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count"))) +;; (cons "tests" db:test-record-fields) ;; "id" "testname" "test_path") +;; (list "steps" "id" "stepname")))) +;; (runs-spec (let ((r (alist-ref "runs" fields-spec equal?))) ;; the check is now unnecessary +;; (if (and r (not (null? r))) r (list "id" )))) +;; (tests-spec (let ((t (alist-ref "tests" fields-spec equal?))) +;; (if (and t (null? t)) ;; all fields +;; db:test-record-fields +;; t))) +;; (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) ;; '("id")))) +;; (steps-spec (alist-ref "steps" fields-spec equal?)) +;; (test-field-index (make-hash-table))) +;; (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec +;; (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec))) +;; (if (null? invalid-tests-spec) +;; ;; generate the lookup map test-field-name => index-number +;; (let loop ((hed (car adj-tests-spec)) +;; (tal (cdr adj-tests-spec)) +;; (idx 0)) +;; (hash-table-set! test-field-index hed idx) +;; (if (not (null? tal))(loop (car tal)(cdr tal)(+ idx 1)))) +;; (begin +;; (debug:print-error 0 *default-log-port* "Invalid test fields specified: " (string-intersperse invalid-tests-spec ", ")) +;; (exit))))) +;; ;; Each run +;; (for-each +;; (lambda (run) +;; (let ((targetstr (string-intersperse (map (lambda (x) +;; (db:get-value-by-header run header x)) +;; keys) "/"))) +;; (if db-targets +;; (if (not (hash-table-ref/default seen targetstr #f)) +;; (begin +;; (hash-table-set! seen targetstr #t) +;; ;; (print "[" targetstr "]")))) +;; (if (not dmode) +;; (print targetstr) +;; (hash-table-set! data "targets" (cons targetstr (hash-table-ref/default data "targets" '()))) +;; ))) +;; (let* ((run-id (db:get-value-by-header run header "id")) +;; (runname (db:get-value-by-header run header "runname")) +;; (states (string-split (or (args:get-arg "-state") "") ",")) +;; (statuses (string-split (or (args:get-arg "-status") "") ",")) +;; (tests (if tests-spec +;; (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc +;; ;; use qryvals if test-spec provided +;; (if tests-spec +;; (string-intersperse adj-tests-spec ",") +;; ;; db:test-record-fields +;; #f) +;; #f +;; 'normal) +;; '()))) +;; (case dmode +;; ((json ods sexpr) +;; (if runs-spec +;; (for-each +;; (lambda (field-name) +;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header field-name)) targetstr runname "meta" field-name)) +;; runs-spec))) +;; ;; (mutils:hierhash-set! data (db:get-value-by-header run header "status") targetstr runname "meta" "status" ) +;; ;; (mutils:hierhash-set! data (db:get-value-by-header run header "state") targetstr runname "meta" "state" ) +;; ;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header "id")) targetstr runname "meta" "id" ) +;; ;; (mutils:hierhash-set! data (db:get-value-by-header run header "event_time") targetstr runname "meta" "event_time" ) +;; ;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment") targetstr runname "meta" "comment" ) +;; ;; ;; add last entry twice - seems to be a bug in hierhash? +;; ;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment") targetstr runname "meta" "comment" ) +;; (else +;; (if (null? runs-spec) +;; (print "Run: " targetstr "/" runname +;; " status: " (db:get-value-by-header run header "state") +;; " run-id: " run-id ", number tests: " (length tests) +;; " event_time: " (db:get-value-by-header run header "event_time")) +;; (begin +;; (if (not (member "target" runs-spec)) +;; ;; (display (conc "Target: " targetstr)) +;; (display (conc "Run: " targetstr "/" runname " "))) +;; (for-each +;; (lambda (field-name) +;; (if (equal? field-name "target") +;; (display (conc "target: " targetstr " ")) +;; (display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " ")))) +;; runs-spec) +;; (newline))))) +;; +;; (for-each +;; (lambda (test) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-error 0 *default-log-port* "Bad data in test record? " test) +;; (debug:print-error 5 *default-log-port* "exn=" (condition->list exn)) +;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) +;; (print-call-chain (current-error-port))) +;; (let* ((test-id (if (member "id" tests-spec)(get-value-by-fieldname test test-field-index "id" ) #f)) ;; (db:test-get-id test)) +;; (testname (if (member "testname" tests-spec)(get-value-by-fieldname test test-field-index "testname" ) #f)) ;; (db:test-get-testname test)) +;; (itempath (if (member "item_path" tests-spec)(get-value-by-fieldname test test-field-index "item_path" ) #f)) ;; (db:test-get-item-path test)) +;; (comment (if (member "comment" tests-spec)(get-value-by-fieldname test test-field-index "comment" ) #f)) ;; (db:test-get-comment test)) +;; (tstate (if (member "state" tests-spec)(get-value-by-fieldname test test-field-index "state" ) #f)) ;; (db:test-get-state test)) +;; (tstatus (if (member "status" tests-spec)(get-value-by-fieldname test test-field-index "status" ) #f)) ;; (db:test-get-status test)) +;; (event-time (if (member "event_time" tests-spec)(get-value-by-fieldname test test-field-index "event_time" ) #f)) ;; (db:test-get-event_time test)) +;; (rundir (if (member "rundir" tests-spec)(get-value-by-fieldname test test-field-index "rundir" ) #f)) ;; (db:test-get-rundir test)) +;; (final_logf (if (member "final_logf" tests-spec)(get-value-by-fieldname test test-field-index "final_logf" ) #f)) ;; (db:test-get-final_logf test)) +;; (run_duration (if (member "run_duration" tests-spec)(get-value-by-fieldname test test-field-index "run_duration") #f)) ;; (db:test-get-run_duration test)) +;; (fullname (conc testname +;; (if (equal? itempath "") +;; "" +;; (conc "(" itempath ")"))))) +;; (case dmode +;; ((json ods sexpr) +;; (if tests-spec +;; (for-each +;; (lambda (field-name) +;; (mutils:hierhash-set! data (get-value-by-fieldname test test-field-index field-name) targetstr runname "data" (conc test-id) field-name)) +;; tests-spec))) +;; ;; ;; (mutils:hierhash-set! data fullname targetstr runname "data" (conc test-id) "tname" ) +;; ;; (mutils:hierhash-set! data testname targetstr runname "data" (conc test-id) "testname" ) +;; ;; (mutils:hierhash-set! data itempath targetstr runname "data" (conc test-id) "itempath" ) +;; ;; (mutils:hierhash-set! data comment targetstr runname "data" (conc test-id) "comment" ) +;; ;; (mutils:hierhash-set! data tstate targetstr runname "data" (conc test-id) "state" ) +;; ;; (mutils:hierhash-set! data tstatus targetstr runname "data" (conc test-id) "status" ) +;; ;; (mutils:hierhash-set! data rundir targetstr runname "data" (conc test-id) "rundir" ) +;; ;; (mutils:hierhash-set! data final_logf targetstr runname "data" (conc test-id) "final_logf") +;; ;; (mutils:hierhash-set! data run_duration targetstr runname "data" (conc test-id) "run_duration") +;; ;; (mutils:hierhash-set! data event-time targetstr runname "data" (conc test-id) "event_time") +;; ;; ;; add last entry twice - seems to be a bug in hierhash? +;; ;; (mutils:hierhash-set! data event-time targetstr runname "data" (conc test-id) "event_time") +;; ;; ) +;; (else +;; (if (and tstate tstatus event-time) +;; (format #t +;; " Test: ~25a State: ~15a Status: ~15a Runtime: ~5@as Time: ~22a Host: ~10a\n" +;; (if fullname fullname "") +;; (if tstate tstate "") +;; (if tstatus tstatus "") +;; (get-value-by-fieldname test test-field-index "run_duration");;(if test (db:test-get-run_duration test) "") +;; (if event-time event-time "") +;; (get-value-by-fieldname test test-field-index "host")) ;;(if test (db:test-get-host test)) "") +;; (print " Test: " fullname +;; (if tstate (conc " State: " tstate) "") +;; (if tstatus (conc " Status: " tstatus) "") +;; (if (get-value-by-fieldname test test-field-index "run_duration") +;; (conc " Runtime: " (get-value-by-fieldname test test-field-index "run_duration")) +;; "") +;; (if event-time (conc " Time: " event-time) "") +;; (if (get-value-by-fieldname test test-field-index "host") +;; (conc " Host: " (get-value-by-fieldname test test-field-index "host")) +;; ""))) +;; (if (not (or (equal? (get-value-by-fieldname test test-field-index "status") "PASS") +;; (equal? (get-value-by-fieldname test test-field-index "status") "WARN") +;; (equal? (get-value-by-fieldname test test-field-index "state") "NOT_STARTED"))) +;; (begin +;; (print (if (get-value-by-fieldname test test-field-index "cpuload") +;; (conc " cpuload: " (get-value-by-fieldname test test-field-index "cpuload")) +;; "") ;; (db:test-get-cpuload test) +;; (if (get-value-by-fieldname test test-field-index "diskfree") +;; (conc "\n diskfree: " (get-value-by-fieldname test test-field-index "diskfree")) ;; (db:test-get-diskfree test) +;; "") +;; (if (get-value-by-fieldname test test-field-index "uname") +;; (conc "\n uname: " (get-value-by-fieldname test test-field-index "uname")) ;; (db:test-get-uname test) +;; "") +;; (if (get-value-by-fieldname test test-field-index "rundir") +;; (conc "\n rundir: " (get-value-by-fieldname test test-field-index "rundir")) ;; (db:test-get-rundir test) +;; "") +;; ;; "\n rundir: " (get-value-by-fieldname test test-field-index "") ;; (sdb:qry 'getstr ;; (filedb:get-path *fdb* +;; ;; (db:test-get-rundir test) ;; ) +;; ) +;; ;; Each test +;; ;; DO NOT remote run +;; (let ((steps (rmt:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) +;; (for-each +;; (lambda (step) +;; (format #t +;; " Step: ~20a State: ~10a Status: ~10a Time ~22a\n" +;; (tdb:step-get-stepname step) +;; (tdb:step-get-state step) +;; (tdb:step-get-status step) +;; (tdb:step-get-event_time step))) +;; steps))))))))) +;; (if (args:get-arg "-sort") +;; (sort tests +;; (lambda (a-test b-test) +;; (let* ((key (args:get-arg "-sort")) +;; (first (get-value-by-fieldname a-test test-field-index key)) +;; (second (get-value-by-fieldname b-test test-field-index key))) +;; ((cond +;; ((and (number? first)(number? second)) <) +;; ((and (string? first)(string? second)) string<=?) +;; (else equal?)) +;; first second)))) +;; tests)))))) +;; runs) +;; (case dmode +;; ((json) (json-write data)) +;; ((sexpr) (pp (common:to-alist data)))) +;; (let* ((metadat-fields (delete-duplicates +;; (append keys '( "runname" "time" "owner" "pass_count" "fail_count" "state" "status" "comment" "id")))) +;; (run-fields '( +;; "testname" +;; "item_path" +;; "state" +;; "status" +;; "comment" +;; "event_time" +;; "host" +;; "run_id" +;; "run_duration" +;; "attemptnum" +;; "id" +;; "archived" +;; "diskfree" +;; "cpuload" +;; "final_logf" +;; "shortdir" +;; "rundir" +;; "uname" +;; ) +;; ) +;; (newdat (common:to-alist data)) +;; (allrundat (if (null? newdat) +;; '() +;; (car (map cdr newdat)))) ;; (car (map cdr (car (map cdr newdat))))) +;; (runs (append +;; (list "runs" ;; sheetname +;; metadat-fields) +;; (map (lambda (run) +;; ;; (print "run: " run) +;; (let* ((runname (car run)) +;; (rundat (cdr run)) +;; (metadat (let ((tmp (assoc "meta" rundat))) +;; (if tmp (cdr tmp) #f)))) +;; ;; (print "runname: " runname "\n\nrundat: " )(pp rundat)(print "\n\nmetadat: ")(pp metadat) +;; (if metadat +;; (map (lambda (field) +;; (let ((tmp (assoc field metadat))) +;; (if tmp (cdr tmp) ""))) +;; metadat-fields) +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: meta data for run " runname " not found") +;; '())))) +;; allrundat))) +;; ;; '( ( "target" ( "runname" ( "data" ( "runid" ( "id . "37" ) ( ... )))) +;; (run-pages (map (lambda (targdat) +;; (let* ((target (car targdat)) +;; (runsdat (cdr targdat))) +;; (if runsdat +;; (map (lambda (rundat) +;; (let* ((runname (car rundat)) +;; (rundat (cdr rundat)) +;; (testsdat (let ((tmp (assoc "data" rundat))) +;; (if tmp (cdr tmp) #f)))) +;; (if testsdat +;; (let ((tests (map (lambda (test) +;; (let* ((test-id (car test)) +;; (test-dat (cdr test))) +;; (map (lambda (field) +;; (let ((tmp (assoc field test-dat))) +;; (if tmp (cdr tmp) ""))) +;; run-fields))) +;; testsdat))) +;; ;; (print "Target: " target "/" runname " tests:") +;; ;; (pp tests) +;; (cons (conc target "/" runname) +;; (cons (list (conc target "/" runname)) +;; (cons '() +;; (cons run-fields tests))))) +;; (begin +;; (debug:print 4 *default-log-port* "WARNING: run " target "/" runname " appears to have no data") +;; ;; (pp rundat) +;; '())))) +;; runsdat) +;; '()))) +;; newdat)) ;; we use newdat to get target +;; (sheets (filter (lambda (x) +;; (not (null? x))) +;; (cons runs (map car run-pages))))) +;; ;; (print "allrundat:") +;; ;; (pp allrundat) +;; ;; (print "runs:") +;; ;; (pp runs) +;; ;(print "sheets: ") +;; ;; (pp sheets) +;; (if (eq? dmode 'ods) +;; (let* ((tempdir (conc "/tmp/" (current-user-name) "/" (pseudo-random-integer 10000) "_" (current-process-id))) +;; (outputfile (or (args:get-arg "-o") "out.ods")) +;; (ouf (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path? +;; outputfile +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") +;; (conc (current-directory) "/" outputfile))))) +;; (create-directory tempdir #t) +;; (ods:list->ods tempdir ouf sheets)))) +;; ;; (system (conc "rm -rf " tempdir)) +;; (set! *didsomething* #t) +;; (bdat-time-to-exit-set! *bdat* #t) +;; ) ;; end if true branch (end of a let) +;; ) ;; end if +;; ) ;; end if -list-runs +;; +;; ;; list-waivers +;; (if (and (args:get-arg "-list-waivers") +;; (launch:setup)) +;; (let* ((runpatt (or (args:get-arg "-runname") "%")) +;; (testpatt (common:args-get-testpatt #f)) +;; (keys (rmt:get-keys)) +;; (runsdat (rmt:get-runs-by-patt +;; keys runpatt +;; (common:args-get-target) #f #f +;; '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) +;; (runs (db:get-rows runsdat)) +;; (header (db:get-header runsdat)) +;; (results (make-hash-table)) ;; [target] ( (testname/itempath . "comment") ... ) +;; (addtest (lambda (target testname itempath comment) +;; (hash-table-set! results target (cons (cons (conc testname "/" itempath) comment) +;; (hash-table-ref/default results target '()))))) +;; (last-target #f)) +;; (for-each +;; (lambda (run) +;; (let* ((run-id (db:get-value-by-header run header "id")) +;; (target (rmt:get-target run-id)) +;; (runname (db:get-value-by-header run header "runname")) +;; (tests (rmt:get-tests-for-run +;; run-id testpatt '("COMPLETED") '("WAIVED") #f #f #f 'testname 'asc ;; use qryvals if test-spec provided +;; #f #f #f))) +;; (if (not (equal? target last-target)) +;; (print "[" target "]")) +;; (set! last-target target) +;; (print "# " runname) +;; (for-each +;; (lambda (testdat) +;; (let* ((testfullname (conc (db:test-get-testname testdat) +;; (if (equal? "" (db:test-get-item-path testdat)) +;; "" +;; (conc "/" (db:test-get-item-path testdat))) +;; ))) +;; (print testfullname " " (db:test-get-comment testdat)))) +;; tests))) +;; runs) +;; (set! *didsomething* #t))) +;; +;; +;; ;; get lock in db for full run for this directory +;; ;; for all tests with deps +;; ;; walk tree of tests to find head tasks +;; ;; add head tasks to task queue +;; ;; add dependant tasks to task queue +;; ;; add remaining tasks to task queue +;; ;; for each task in task queue +;; ;; if have adequate resources +;; ;; launch task +;; ;; else +;; ;; put task in deferred queue +;; ;; if still ok to run tasks +;; ;; process deferred tasks per above steps +;; +;; ;; run all tests are are Not COMPLETED and PASS or CHECK +;; (if (or (args:get-arg "-runall") +;; (args:get-arg "-run") +;; (args:get-arg "-rerun-clean") +;; (args:get-arg "-rerun-all") +;; (args:get-arg "-runtests") +;; (args:get-arg "-kill-rerun")) +;; (let ((need-clean (or (args:get-arg "-rerun-clean") +;; (args:get-arg "-rerun-all"))) +;; (orig-cmdline (string-intersperse (argv) " "))) +;; (general-run-call +;; "-runall" +;; "run all tests" +;; (lambda (target runname keys keyvals) +;; (if (or (string-search "%" target) +;; (string-search "%" runname)) ;; we are being asked to re-run multiple runs +;; (let* ((run-specs (rmt:simple-get-runs runname #f #f target #f))) ;; list of simple-run records +;; (debug:print-info 0 *default-log-port* "Pattern supplied for target or runname with " +;; (length run-specs) " matches round. Running each in turn.") +;; (if (null? run-specs) +;; (debug:print 0 *default-log-port* "WARNING: No runs match target " target " and runname " runname)) +;; (for-each (lambda (spec) +;; (let* ((precmd (if (args:get-arg "-precmd")(conc (args:get-arg "-precmd") " ") "")) +;; (newcmdline (conc +;; precmd +;; (string-substitute +;; (conc "target " target) +;; (conc "target " (simple-run-target spec)) +;; (string-substitute +;; (conc "runname " runname) +;; (conc "runname " (simple-run-runname spec)) +;; orig-cmdline))))) +;; (debug:print 0 *default-log-port* "ORIG: " orig-cmdline) +;; (debug:print 0 *default-log-port* "NEW: " newcmdline) +;; (system newcmdline))) +;; run-specs)) +;; (handle-run-requests target runname keys keyvals need-clean)))))) +;; +;; ;;====================================================================== +;; ;; run one test +;; ;;====================================================================== +;; +;; ;; 1. find the config file +;; ;; 2. change to the test directory +;; ;; 3. update the db with "test started" status, set running host +;; ;; 4. process launch the test +;; ;; - monitor the process, update stats in the db every 2^n minutes +;; ;; 5. as the test proceeds internally it calls megatest as each step is +;; ;; started and completed +;; ;; - step started, timestamp +;; ;; - step completed, exit status, timestamp +;; ;; 6. test phone home +;; ;; - if test run time > allowed run time then kill job +;; ;; - if cannot access db > allowed disconnect time then kill job +;; +;; ;; == duplicated == (if (or (args:get-arg "-run")(args:get-arg "-runtests")) +;; ;; == duplicated == (general-run-call +;; ;; == duplicated == "-runtests" +;; ;; == duplicated == "run a test" +;; ;; == duplicated == (lambda (target runname keys keyvals) +;; ;; == duplicated == ;; +;; ;; == duplicated == ;; May or may not implement it this way ... +;; ;; == duplicated == ;; +;; ;; == duplicated == ;; Insert this run into the tasks queue +;; ;; == duplicated == ;; (open-run-close tasks:add tasks:open-db +;; ;; == duplicated == ;; "runtests" +;; ;; == duplicated == ;; user +;; ;; == duplicated == ;; target +;; ;; == duplicated == ;; runname +;; ;; == duplicated == ;; (args:get-arg "-runtests") +;; ;; == duplicated == ;; #f)))) +;; ;; == duplicated == (runs:run-tests target +;; ;; == duplicated == runname +;; ;; == duplicated == (common:args-get-testpatt #f) ;; (args:get-arg "-runtests") +;; ;; == duplicated == user +;; ;; == duplicated == args:arg-hash)))) +;; +;; ;;====================================================================== +;; ;; Rollup into a run +;; ;;====================================================================== +;; +;; ;; (if (args:get-arg "-rollup") +;; ;; (general-run-call +;; ;; "-rollup" +;; ;; "rollup tests" +;; ;; (lambda (target runname keys keyvals) +;; ;; (runs:rollup-run keys +;; ;; keyvals +;; ;; (or (args:get-arg "-runname")(args:get-arg ":runname") ) +;; ;; user)))) +;; +;; ;;====================================================================== +;; ;; Lock or unlock a run +;; ;;====================================================================== +;; +;; (if (or (args:get-arg "-lock")(args:get-arg "-unlock")) +;; (general-run-call +;; (if (args:get-arg "-lock") "-lock" "-unlock") +;; "lock/unlock tests" +;; (lambda (target runname keys keyvals) +;; (runs:handle-locking +;; target +;; keys +;; (or (args:get-arg "-runname")(args:get-arg ":runname") ) +;; (args:get-arg "-lock") +;; (args:get-arg "-unlock") +;; (bdat-user *bdat*))))) +;; +;; ;;====================================================================== +;; ;; Get paths to tests +;; ;;====================================================================== +;; ;; Get test paths matching target, runname, and testpatt +;; (if (or (args:get-arg "-test-files")(args:get-arg "-test-paths")) +;; ;; if we are in a test use the MT_CMDINFO data +;; (if (get-environment-variable "MT_CMDINFO") +;; (let* ((startingdir (current-directory)) +;; (cmdinfo (common:read-encoded-string (get-environment-variable "MT_CMDINFO"))) +;; (transport (assoc/default 'transport cmdinfo)) +;; (testpath (assoc/default 'testpath cmdinfo)) +;; (test-name (assoc/default 'test-name cmdinfo)) +;; (runscript (assoc/default 'runscript cmdinfo)) +;; (db-host (assoc/default 'db-host cmdinfo)) +;; (run-id (assoc/default 'run-id cmdinfo)) +;; (itemdat (assoc/default 'itemdat cmdinfo)) +;; (state (args:get-arg ":state")) +;; (status (args:get-arg ":status")) +;; ;;(target (args:get-arg "-target")) +;; (target (common:args-get-target)) +;; (toppath (assoc/default 'toppath cmdinfo))) +;; (change-directory toppath) +;; (if (not target) +;; (begin +;; (debug:print-error 0 *default-log-port* "-target is required.") +;; (exit 1))) +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, giving up on -test-paths or -test-files, exiting") +;; (exit 1))) +;; (let* ((keys (rmt:get-keys)) +;; ;; db:test-get-paths must not be run remote +;; (paths (tests:test-get-paths-matching keys target (args:get-arg "-test-files")))) +;; (set! *didsomething* #t) +;; (for-each (lambda (path) +;; (if (common:file-exists? path) +;; (print path))) +;; paths))) +;; ;; else do a general-run-call +;; (general-run-call +;; "-test-files" +;; "Get paths to test" +;; (lambda (target runname keys keyvals) +;; (let* ((db #f) +;; ;; DO NOT run remote +;; (paths (tests:test-get-paths-matching keys target (args:get-arg "-test-files")))) +;; (for-each (lambda (path) +;; (print path)) +;; paths)))))) +;; +;; ;;====================================================================== +;; ;; Archive tests +;; ;;====================================================================== +;; ;; Archive tests matching target, runname, and testpatt +;; (if (equal? (args:get-arg "-archive") "replicate-db") +;; (begin +;; ;; check if source +;; ;; check if megatest.db exist +;; (launch:setup) +;; (if (not (args:get-arg "-source")) +;; (begin +;; (debug:print-info 1 *default-log-port* "Missing required argument -source ") +;; (exit 1))) +;; (if (common:file-exists? (conc *toppath* "/megatest.db")) +;; (begin +;; (debug:print-info 1 *default-log-port* "File " (conc *toppath* "/megatest.db") " already exists. Please remove it before trying to replicate db") +;; (exit 1))) +;; (if (and (common:get-db-tmp-area) (> (length (directory (common:get-db-tmp-area) #f)) 0)) +;; (begin +;; (debug:print-info 1 *default-log-port* (common:get-db-tmp-area) " not empty. Please remove it before trying to replicate db") +;; (exit 1))) +;; ;; check if timestamp +;; (let* ((source (args:get-arg "-source")) +;; (src (if (not (equal? (substring source 0 1) "/")) +;; (conc (current-directory) "/" source) +;; source)) +;; (ts (if (args:get-arg "-time-stamp") (args:get-arg "-time-stamp") "latest"))) +;; (if (common:directory-exists? src) +;; (begin +;; (archive:restore-db src ts) +;; (set! *didsomething* #t)) +;; (begin +;; (debug:print-error 1 *default-log-port* "Path " source " not found") +;; (exit 1)))))) +;; ;; else do a general-run-call +;; (if (and (args:get-arg "-archive") (not (equal? (args:get-arg "-archive") "replicate-db"))) +;; (begin +;; ;; for the archive get we need to preserve the starting dir as part of the target path +;; (if (and (args:get-arg "-dest") +;; (not (equal? (substring (args:get-arg "-dest") 0 1) "/"))) +;; (let ((newpath (conc (current-directory) "/" (args:get-arg "-dest")))) +;; (debug:print-info 1 *default-log-port* "Preserving original path to destination, was " (args:get-arg "-dest") ", now " newpath) +;; (hash-table-set! args:arg-hash "-dest" newpath))) +;; (general-run-call +;; "-archive" +;; "Archive" +;; (lambda (target runname keys keyvals) +;; (operate-on 'archive target-in: target runname-in: runname ))))) +;; +;; ;;====================================================================== +;; ;; Extract a spreadsheet from the runs database +;; ;;====================================================================== +;; +;; ;; TODO: Reenable this functionality +;; +;; #;(if (args:get-arg "-extract-ods") +;; (general-run-call +;; "-extract-ods" +;; "Make ods spreadsheet" +;; (lambda (target runname keys keyvals) +;; (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: #t)) +;; (outputfile (args:get-arg "-extract-ods")) +;; (runspatt (or (args:get-arg "-runname")(args:get-arg ":runname"))) +;; (pathmod (args:get-arg "-pathmod"))) +;; ;; (keyvalalist (keys->alist keys "%"))) +;; (debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals) +;; (db:extract-ods-file dbstruct outputfile keyvals (if runspatt runspatt "%") pathmod) +;; (db:close-all dbstruct) +;; (set! *didsomething* #t))))) +;; +;; ;;====================================================================== +;; ;; execute the test +;; ;; - gets called on remote host +;; ;; - receives info from the -execute param +;; ;; - passes info to steps via MT_CMDINFO env var (future is to use a dot file) +;; ;; - gathers host info and +;; ;;====================================================================== +;; +;; (if (args:get-arg "-execute") +;; (begin +;; (launch:execute (args:get-arg "-execute")) +;; (set! *didsomething* #t))) +;; +;; ;;====================================================================== +;; ;; recover from a test where the managing mtest was killed but the underlying +;; ;; process might still be salvageable +;; ;;====================================================================== +;; +;; (if (args:get-arg "-recover-test") +;; (let* ((params (string-split (args:get-arg "-recover-test") ","))) +;; (if (> (length params) 1) ;; run-id and test-id +;; (let ((run-id (string->number (car params))) +;; (test-id (string->number (cadr params)))) +;; (if (and run-id test-id) +;; (begin +;; (launch:recover-test run-id test-id) +;; (set! *didsomething* #t)) +;; (begin +;; (debug:print-error 0 *default-log-port* "bad run-id or test-id, must be integers") +;; (exit 1))))))) +;; +;; (if (args:get-arg "-step") +;; (begin +;; (thread-sleep! 1.5) +;; (megatest:step +;; (args:get-arg "-step") +;; (or (args:get-arg "-state")(args:get-arg ":state")) +;; (or (args:get-arg "-status")(args:get-arg ":status")) +;; (args:get-arg "-setlog") +;; (args:get-arg "-m")) +;; ;; (if db (sqlite3:finalize! db)) +;; (set! *didsomething* #t) +;; (thread-sleep! 1.5))) +;; +;; (if (or (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status +;; ;; (not (args:get-arg "-step"))) ;; -setlog may have been processed already in the "-step" previous +;; ;; NEW POLICY - -setlog sets test overall log on every call. +;; (args:get-arg "-set-toplog") +;; (args:get-arg "-test-status") +;; (args:get-arg "-set-values") +;; (args:get-arg "-load-test-data") +;; (args:get-arg "-runstep") +;; (args:get-arg "-summarize-items")) +;; (if (not (get-environment-variable "MT_CMDINFO")) +;; (begin +;; (debug:print-error 0 *default-log-port* "MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!") +;; (exit 5)) +;; (let* ((startingdir (current-directory)) +;; (cmdinfo (common:read-encoded-string (get-environment-variable "MT_CMDINFO"))) +;; (transport (assoc/default 'transport cmdinfo)) +;; (testpath (assoc/default 'testpath cmdinfo)) +;; (test-name (assoc/default 'test-name cmdinfo)) +;; (runscript (assoc/default 'runscript cmdinfo)) +;; (db-host (assoc/default 'db-host cmdinfo)) +;; (run-id (assoc/default 'run-id cmdinfo)) +;; (test-id (assoc/default 'test-id cmdinfo)) +;; (itemdat (assoc/default 'itemdat cmdinfo)) +;; (work-area (assoc/default 'work-area cmdinfo)) +;; (db #f) ;; (open-db)) +;; (state (args:get-arg ":state")) +;; (status (args:get-arg ":status")) +;; (stepname (args:get-arg "-step"))) +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; +;; (if (args:get-arg "-runstep")(debug:print-info 1 *default-log-port* "Running -runstep, first change to directory " work-area)) +;; (change-directory work-area) +;; ;; can setup as client for server mode now +;; ;; (client:setup) +;; +;; (if (args:get-arg "-load-test-data") +;; ;; has sub commands that are rdb: +;; ;; DO NOT put this one into either rmt: or open-run-close +;; (tdb:load-test-data run-id test-id)) +;; (if (args:get-arg "-setlog") +;; (let ((logfname (args:get-arg "-setlog"))) +;; (rmt:test-set-log! run-id test-id logfname))) +;; (if (args:get-arg "-set-toplog") +;; ;; DO NOT run remote +;; (tests:test-set-toplog! run-id test-name (args:get-arg "-set-toplog"))) +;; (if (args:get-arg "-summarize-items") +;; ;; DO NOT run remote +;; (tests:summarize-items run-id test-id test-name #t)) ;; do force here +;; (if (args:get-arg "-runstep") +;; (if (null? remargs) +;; (begin +;; (debug:print-error 0 *default-log-port* "nothing specified to run!") +;; (if db (sqlite3:finalize! db)) +;; (exit 6)) +;; (let* ((stepname (args:get-arg "-runstep")) +;; (logprofile (args:get-arg "-logpro")) +;; (logfile (conc stepname ".log")) +;; (cmd (if (null? remargs) #f (car remargs))) +;; (params (if cmd (cdr remargs) '())) +;; (exitstat #f) +;; (shell (let ((sh (get-environment-variable "SHELL") )) +;; (if sh +;; (last (string-split sh "/")) +;; "bash"))) +;; (redir (case (string->symbol shell) +;; ((tcsh csh ksh) ">&") +;; ((zsh bash sh ash) "2>&1 >") +;; (else ">&"))) +;; (fullcmd (conc "(" (string-intersperse +;; (cons cmd params) " ") +;; ") " redir " " logfile))) +;; ;; mark the start of the test +;; (rmt:teststep-set-status! run-id test-id stepname "start" "n/a" (args:get-arg "-m") logfile) +;; ;; run the test step +;; (debug:print-info 2 *default-log-port* "Running \"" fullcmd "\" in directory \"" startingdir) +;; (change-directory startingdir) +;; (set! exitstat (system fullcmd)) +;; (set! *globalexitstatus* exitstat) +;; ;; (change-directory testpath) +;; ;; run logpro if applicable ;; (process-run "ls" (list "/foo" "2>&1" "blah.log")) +;; (if logprofile +;; (let* ((htmllogfile (conc stepname ".html")) +;; (oldexitstat exitstat) +;; (cmd (string-intersperse (list "logpro" logprofile htmllogfile "<" logfile ">" (conc stepname "_logpro.log")) " "))) +;; (debug:print-info 2 *default-log-port* "running \"" cmd "\"") +;; (change-directory startingdir) +;; (set! exitstat (system cmd)) +;; (set! *globalexitstatus* exitstat) ;; no necessary +;; (change-directory testpath) +;; (rmt:test-set-log! run-id test-id htmllogfile))) +;; (let ((msg (args:get-arg "-m"))) +;; (rmt:teststep-set-status! run-id test-id stepname "end" exitstat msg logfile)) +;; ))) +;; (if (or (args:get-arg "-test-status") +;; (args:get-arg "-set-values")) +;; (let ((newstatus (cond +;; ((number? status) (if (equal? status 0) "PASS" "FAIL")) +;; ((and (string? status) +;; (string->number status))(if (equal? (string->number status) 0) "PASS" "FAIL")) +;; (else status))) +;; ;; transfer relevant keys into a hash to be passed to test-set-status! +;; ;; could use an assoc list I guess. +;; (otherdata (let ((res (make-hash-table))) +;; (for-each (lambda (key) +;; (if (args:get-arg key) +;; (hash-table-set! res key (args:get-arg key)))) +;; (list ":value" ":tol" ":expected" ":first_err" ":first_warn" ":units" ":category" ":variable")) +;; res))) +;; (if (and (args:get-arg "-test-status") +;; (or (not state) +;; (not status))) +;; (begin +;; (debug:print-error 0 *default-log-port* "You must specify :state and :status with every call to -test-status\n" help) +;; (if (sqlite3:database? db)(sqlite3:finalize! db)) +;; (exit 6))) +;; (let* ((msg (args:get-arg "-m")) +;; (numoth (length (hash-table-keys otherdata)))) +;; ;; Convert to rpc inside the tests:test-set-status! call, not here +;; (tests:test-set-status! run-id test-id state newstatus msg otherdata work-area: work-area)))) +;; (if (sqlite3:database? db)(sqlite3:finalize! db)) +;; (set! *didsomething* #t)))) +;; +;; ;;====================================================================== +;; ;; Various helper commands can go below here +;; ;;====================================================================== +;; +;; (if (or (args:get-arg "-showkeys") +;; (args:get-arg "-show-keys")) +;; (let ((db #f) +;; (keys #f)) +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; (set! keys (rmt:get-keys)) ;; db)) +;; (debug:print 1 *default-log-port* "Keys: " (string-intersperse keys ", ")) +;; (if (sqlite3:database? db)(sqlite3:finalize! db)) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-gui") +;; (begin +;; (debug:print 0 *default-log-port* "Look at the dashboard for now") +;; ;; (megatest-gui) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-create-megatest-area") +;; (begin +;; (genexample:mk-megatest.config) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-create-test") +;; (let ((testname (args:get-arg "-create-test"))) +;; (genexample:mk-megatest-test testname) +;; (set! *didsomething* #t))) +;; +;; ;;====================================================================== +;; ;; Update the database schema, clean up the db +;; ;;====================================================================== +;; +;; ;; TODO: Restore this functionality +;; +;; #;(if (args:get-arg "-rebuild-db") +;; (begin +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; ;; keep this one local +;; ;; (open-run-close patch-db #f) +;; (let ((dbstruct (db:setup #f areapath: *toppath*))) +;; (common:cleanup-db dbstruct full: #t)) +;; (set! *didsomething* #t))) +;; +;; #;(if (args:get-arg "-cleanup-db") +;; (begin +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; (let ((dbstruct (db:setup #f areapath: *toppath*))) +;; (common:cleanup-db dbstruct)) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-mark-incompletes") +;; (begin +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; (runs:find-and-mark-incomplete-and-check-end-of-run #f) +;; (set! *didsomething* #t))) +;; +;; ;;====================================================================== +;; ;; Update the tests meta data from the testconfig files +;; ;;====================================================================== +;; +;; (if (args:get-arg "-update-meta") +;; (begin +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; (runs:update-all-test_meta #f) +;; (set! *didsomething* #t))) +;; +;; ;;====================================================================== +;; ;; Start a repl +;; ;;====================================================================== +;; +;; ;; fakeout readline +;; ;; (include "readline-fix.scm") +;; +;; +;; (when (args:get-arg "-diff-rep") +;; (when (and +;; (not (args:get-arg "-diff-html")) +;; (not (args:get-arg "-diff-email"))) +;; (debug:print 0 *default-log-port* "Must specify -diff-html or -diff-email with -diff-rep") +;; (set! *didsomething* 1) +;; (exit 1)) +;; +;; (let* ((toppath (launch:setup))) +;; (do-diff-report +;; (args:get-arg "-src-target") +;; (args:get-arg "-src-runname") +;; (args:get-arg "-target") +;; (args:get-arg "-runname") +;; (args:get-arg "-diff-html") +;; (args:get-arg "-diff-email")) +;; (set! *didsomething* #t) +;; (exit 0))) +;; +;; (if (or (get-environment-variable "MT_RUNSCRIPT") +;; (args:get-arg "-repl") +;; (args:get-arg "-load")) +;; (let* ((toppath (launch:setup))) +;; +;; ;; (dbstruct (if (and toppath +;; ;; #;(common:on-homehost?)) +;; ;; (db:setup #f) ;; sets up main.db +;; ;; #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) +;; (if *toppath* +;; (cond +;; ((get-environment-variable "MT_RUNSCRIPT") +;; ;; How to run megatest scripts +;; ;; +;; ;; #!/bin/bash +;; ;; +;; ;; export MT_RUNSCRIPT=yes +;; ;; megatest << EOF +;; ;; (print "Hello world") +;; ;; (exit) +;; ;; EOF +;; +;; (repl)) +;; (else +;; (begin +;; ;; (set! *db* dbstruct) +;; ;; (import extras) ;; might not be needed +;; ;; (import chicken.csi) +;; ;; (import readline) +;; #;(import apropos +;; archivemod +;; commonmod +;; configfmod +;; dbmod +;; debugprint +;; ezstepsmod +;; launchmod +;; processmod +;; rmtmod +;; runsmod +;; servermod +;; tasksmod +;; testsmod) +;; +;; (set-history-length! 300) +;; (load-history-from-file ".megatest_history") +;; (current-input-port (make-linenoise-port)) +;; ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ... +;; +;; ;; (if *use-new-readline* +;; ;; (begin +;; ;; (install-history-file (get-environment-variable "HOME") ".megatest_history") ;; [homedir] [filename] [nlines]) +;; ;; (current-input-port (make-readline-port "megatest> "))) +;; ;; (begin +;; ;; (gnu-history-install-file-manager +;; ;; (string-append +;; ;; (or (get-environment-variable "HOME") ".") "/.megatest_history")) +;; ;; (current-input-port (make-gnu-readline-port "megatest> ")))) +;; (if (args:get-arg "-repl") +;; (repl) +;; (load (args:get-arg "-load"))) +;; ;; (db:close-all dbstruct) <= taken care of by on-exit call +;; ) +;; (exit))) +;; (set! *didsomething* #t)))) +;; +;; ;;====================================================================== +;; ;; Wait on a run to complete +;; ;;====================================================================== +;; +;; (if (and (args:get-arg "-run-wait") +;; (not (or (args:get-arg "-run") +;; (args:get-arg "-runtests")))) ;; run-wait is built into runtests now +;; (begin +;; (if (not (launch:setup)) +;; (begin +;; (debug:print 0 *default-log-port* "Failed to setup, exiting") +;; (exit 1))) +;; (operate-on 'run-wait) +;; (set! *didsomething* #t))) +;; +;; ;; ;; ;; redo me ;; Not converted to use dbstruct yet +;; ;; ;; ;; redo me ;; +;; ;; ;; ;; redo me (if (args:get-arg "-convert-to-norm") +;; ;; ;; ;; redo me (let* ((toppath (setup-for-run)) +;; ;; ;; ;; redo me (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t)))) +;; ;; ;; ;; redo me (for-each +;; ;; ;; ;; redo me (lambda (field) +;; ;; ;; ;; redo me (let ((dat '())) +;; ;; ;; ;; redo me (debug:print-info 0 *default-log-port* "Getting data for field " field) +;; ;; ;; ;; redo me (sqlite3:for-each-row +;; ;; ;; ;; redo me (lambda (id val) +;; ;; ;; ;; redo me (set! dat (cons (list id val) dat))) +;; ;; ;; ;; redo me (db:get-db db run-id) +;; ;; ;; ;; redo me (conc "SELECT id," field " FROM tests;")) +;; ;; ;; ;; redo me (debug:print-info 0 *default-log-port* "found " (length dat) " items for field " field) +;; ;; ;; ;; redo me (let ((qry (sqlite3:prepare db (conc "UPDATE tests SET " field "=? WHERE id=?;")))) +;; ;; ;; ;; redo me (for-each +;; ;; ;; ;; redo me (lambda (item) +;; ;; ;; ;; redo me (let ((newval ;; (sdb:qry 'getid +;; ;; ;; ;; redo me (cadr item))) ;; ) +;; ;; ;; ;; redo me (if (not (equal? newval (cadr item))) +;; ;; ;; ;; redo me (debug:print-info 0 *default-log-port* "Converting " (cadr item) " to " newval " for test #" (car item))) +;; ;; ;; ;; redo me (sqlite3:execute qry newval (car item)))) +;; ;; ;; ;; redo me dat) +;; ;; ;; ;; redo me (sqlite3:finalize! qry)))) +;; ;; ;; ;; redo me (db:close-all dbstruct) +;; ;; ;; ;; redo me (list "uname" "rundir" "final_logf" "comment")) +;; ;; ;; ;; redo me (set! *didsomething* #t))) +;; +;; ;; TODO: restore this functionality +;; +;; #;(if (args:get-arg "-import-megatest.db") +;; (begin +;; (db:multi-db-sync +;; (db:setup #f) +;; 'killservers +;; 'dejunk +;; 'adj-testids +;; 'old2new +;; ;; 'new2old +;; ) +;; (set! *didsomething* #t))) +;; +;; #;(when (args:get-arg "-sync-brute-force") +;; ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t)) +;; (set! *didsomething* #t)) +;; +;; #;(if (args:get-arg "-sync-to-megatest.db") +;; (let* ((dbstruct (db:setup #f)) +;; (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct))) +;; (lockfile (conc tmpdbpth ".lock")) +;; (locked (common:simple-file-lock lockfile)) +;; (res (if locked +;; (db:multi-db-sync +;; dbstruct +;; 'new2old) +;; #f))) +;; (if res +;; (begin +;; (common:simple-file-release-lock lockfile) +;; (print "Synced " res " records to megatest.db")) +;; (print "Skipping sync, there is a sync in progress.")) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-sync-to") +;; (let ((toppath (launch:setup))) +;; (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to")) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-list-test-time") +;; (let* ((toppath (launch:setup))) +;; (task:get-test-times) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-list-run-time") +;; (let* ((toppath (launch:setup))) +;; (task:get-run-times) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-generate-html") +;; (let* ((toppath (launch:setup))) +;; (if (tests:create-html-tree #f) +;; (debug:print-info 0 *default-log-port* "HTML output created in " toppath "/lt/page0.html") +;; (debug:print 0 *default-log-port* "Failed to create HTML output in " toppath "/lt/runs-index.html")) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-generate-html-structure") +;; (let* ((toppath (launch:setup))) +;; ;(if (tests:create-html-tree #f) +;; (if (tests:create-html-summary #f) +;; (debug:print-info 0 *default-log-port* "HTML output created in " toppath "/lt/targets.html") +;; (debug:print 0 *default-log-port* "Failed to create HTML output in " toppath "/lt/runs-index.html")) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-syscheck") +;; (begin +;; (mutils:syscheck common:raw-get-remote-host-load +;; server:get-best-guess-address +;; configf:read-config) +;; (set! *didsomething* #t))) +;; +;; (if (args:get-arg "-extract-skeleton") +;; (let* ((toppath (launch:setup))) +;; (genexample:extract-skeleton-area (args:get-arg "-extract-skeleton")) +;; (set! *didsomething* #t))) +;; +;; ;;====================================================================== +;; ;; Exit and clean up +;; ;;====================================================================== +;; +;; (if (not *didsomething*) +;; (debug:print 0 *default-log-port* help) +;; (bdat-time-to-exit-set! *bdat* #t) +;; ) +;; ;;(debug:print-info 13 *default-log-port* "thread-join! watchdog") +;; +;; ;; join the watchdog thread if it has been thread-start!ed (it may not have been started in the case of a server that never enters running state) +;; ;; (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead) +;; ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +;; #;(let* ((watchdog (bdat-watchdog *bdat*))) +;; (if (thread? watchdog) +;; (case (thread-state watchdog) +;; ((ready running blocked sleeping terminated dead) +;; (thread-join! watchdog))))) +;; +;; (bdat-time-to-exit-set! *bdat* #t) +;; +;; (if (not (eq? *globalexitstatus* 0)) +;; (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall")) +;; (begin +;; (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*) +;; (exit 0)) +;; (case *globalexitstatus* +;; ((0)(exit 0)) +;; ((1)(exit 1)) +;; ((2)(exit 2)) +;; (else (exit 3))))) +;; ) +;; +;; ;; (import megatest-main commonmod) +;; ;; (import srfi-18) + + Index: mtver.scm ================================================================== --- mtver.scm +++ mtver.scm @@ -22,8 +22,9 @@ (module mtver * (import scheme chicken.module) -(define megatest-version 1.6584) +;; (define megatest-version 1.6584) +(define megatest-version 2.001) ) Index: pgdb.scm ================================================================== --- pgdb.scm +++ pgdb.scm @@ -12,11 +12,11 @@ chicken.condition chicken.string chicken.sort list-utils - postgresql + ;; postgresql srfi-1 srfi-69 typed-records (prefix dbi dbi:) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -1,7 +1,7 @@ ;;====================================================================== -;; Copyright 2017, Matthew Welland. +;; Copyright 2022, Matthew Welland. ;; ;; This file is part of Megatest. ;; ;; Megatest is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -16,10 +16,14 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== +;; generate entries for ~/.megatestrc with the following +;; +;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u + (declare (unit rmtmod)) (declare (uses apimod)) (declare (uses commonmod)) (declare (uses configfmod)) @@ -30,10 +34,12 @@ (declare (uses mtver)) (declare (uses pgdb)) (declare (uses portloggermod)) (declare (uses servermod)) (declare (uses tasksmod)) +(declare (uses ulex)) +(declare (uses dbmgrmod)) (module rmtmod * (import scheme @@ -54,20 +60,19 @@ chicken.string ;; chicken.tcp chicken.random chicken.time chicken.time.posix - (prefix sqlite3 sqlite3:) directory-utils format ;; http-client ;; intarweb matchable md5 message-digest - nng ;; nanomsg + ;; nng ;; nanomsg (prefix base64 base64:) (prefix sqlite3 sqlite3:) regex s11n ;; spiffy @@ -96,371 +101,21 @@ portloggermod (prefix mtargs args:) servermod stml2 tasksmod - ) - -(defstruct alldat - (areapath #f) - (ulexdat #f) - ) - - -;; (require-extension (srfi 18) extras tcp s11n) -;; -;; -;; (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) -;; -;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) -;; -;; Configurations for server -;; (tcp-buffer-size 2048) -;; (max-connections 2048) - -;; info about me as a server -;; -(defstruct servdat - (host #f) - (port #f) - (uuid #f) - (rep #f) - (dbfile #f) - (api-url #f) - (api-uri #f) - (api-req #f) - (status 'starting) - (trynum 0) ;; count the number of ports we've tried - ) - -(define (servdat->url sdat) - (conc (servdat-host sdat)":"(servdat-port sdat))) - -;; generate entries for ~/.megatestrc with the following -;; -;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u - -(defstruct remotedat - (conns (make-hash-table)) ;; apath/dbname => conndat - ) - -(defstruct conndat - (apath #f) - (dbname #f) - (fullname #f) - (hostport #f) - (ipaddr #f) - (port #f) - (socket #f) - (srvpkt #f) - (srvkey #f) - (lastmsg 0) - (expires 0) - (inport #f) - (outport #f)) - -(define *srvpktspec* - `((server (host . h) - (port . p) - (servkey . k) - (pid . i) - (ipaddr . a) - (dbpath . d)))) - -;;====================================================================== -;; S U P P O R T F U N C T I O N S -;;====================================================================== - -;; replaces *runremote* -(define *remotedat* (make-remotedat)) - -;; -> http://abc.com:900/ -;; -(define (conndat->uri conn entrypoint) - (conc "http://"(conndat-ipaddr conn)":"(conndat-port conn)"/"entrypoint)) - -;; set up the api proc, seems like there should be a better place for this? -(define api-proc (make-parameter conc)) -(api-proc api:process-request) - -;; do we have a connection to apath dbname and -;; is it not expired? then return it -;; -;; else setup a connection -;; -;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception -;; -(define (rmt:get-conn remdat apath dbname) - (let* ((fullname (db:dbname->path apath dbname)) - (conn (hash-table-ref/default (remotedat-conns remdat) fullname #f))) - (if (and conn - (< (current-seconds) (conndat-expires conn))) - conn - #f))) - -(define (rmt:find-main-server apath dbname) - (let* ((pktsdir (get-pkts-dir apath)) - (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) - ;; (dbpath (conc apath "/" dbname)) - (viable-srvs (get-viable-servers all-srvpkts dbname))) - (get-the-server apath viable-srvs))) - - -(define *connstart-mutex* (make-mutex)) -(define *last-main-start* 0) - -;; looks for a connection to main, returns if have and not exired -;; creates new otherwise -;; -;; connections for other servers happens by requesting from main -;; -;; TODO: This is unnecessarily re-creating the record in the hash table -;; -(define (rmt:open-main-connection remdat apath) - (let* ((fullpath (db:dbname->path apath "/.db/main.db")) - (conns (remotedat-conns remdat)) - (conn (hash-table-ref/default conns fullpath #f))) ;; TODO - create call for this - (cond - ((and conn ;; conn is NOT a socket, just saying ... - (< (current-seconds) (conndat-expires conn))) - #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died - ((and conn - (>= (current-seconds)(conndat-expires conn))) - (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") - (if (conndat-socket conn) - (nng-close! (conndat-socket conn))) - (hash-table-set! conns fullpath #f) ;; clean up - (rmt:open-main-connection remdat apath)) - (else - ;; Below we will find or create and connect to main - (let* ((dbname (db:run-id->dbname #f)) - (the-srv (rmt:find-main-server apath dbname)) - (start-main-srv (lambda () ;; call IF there is no the-srv found - (mutex-lock! *connstart-mutex*) - (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server - (begin - (api:run-server-process apath dbname) - (set! *last-main-start* (current-seconds)) - (thread-sleep! 1))) - (mutex-unlock! *connstart-mutex*) - (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries - ))) - (if (not the-srv) ;; have server, try connecting to it - (start-main-srv) - (let* ((srv-addr (server-address the-srv)) ;; need serv - (ipaddr (alist-ref 'ipaddr the-srv)) - (port (alist-ref 'port the-srv)) - (srvkey (alist-ref 'servkey the-srv)) - (fullpath (db:dbname->path apath dbname)) - - (new-the-srv (make-conndat - apath: apath - dbname: dbname - fullname: fullpath - hostport: srv-addr - socket: (open-nn-connection srv-addr) - ipaddr: ipaddr - port: port - srvpkt: the-srv - srvkey: srvkey ;; generated by rmt:get-signature on the server side - lastmsg: (current-seconds) - expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping - ))) - (hash-table-set! conns fullpath new-the-srv))) - #t))))) - -;; NB// remdat is a remotedat struct -;; -(define (rmt:general-open-connection remdat apath dbname #!key (num-tries 5)) - (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") - (let* ((mdbname (db:run-id->dbname #f)) - (fullname (db:dbname->path apath dbname)) - (conns (remotedat-conns remdat)) - (mconn (rmt:get-conn remdat apath mdbname))) - (if (and mconn - (not (debug:print-logger))) - (begin - (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") - (debug:print-logger rmt:log-to-main))) - (cond - ((or (not mconn) ;; no channel open to main? - (< (conndat-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease - (if mconn ;; previously opened - clean up NB// consolidate this with the similar code in open main above - (begin - (debug:print-info 0 *default-log-port* "Clearing out connection to main that has expired.") - (nng-close! (conndat-socket mconn)) - (hash-table-set! conns fullname #f))) - (rmt:open-main-connection remdat apath) - (rmt:general-open-connection remdat apath mdbname)) - ((not (rmt:get-conn remdat apath dbname)) ;; no channel open to dbname? - (let* ((res (rmt:send-receive-real remdat apath mdbname 'get-server `(,apath ,dbname)))) - (case res - ((server-started) - (if (> num-tries 0) - (begin - (thread-sleep! 2) - (rmt:general-open-connection remdat apath dbname num-tries: (- num-tries 1))) - (begin - (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) - (exit 1)))) - (else - (if (list? res) ;; server has been registered and the info was returned. pass it on. - (begin ;; ("192.168.0.9" 53817 - ;; "5e34239f48e8973b3813221e54701a01" "24310" - ;; "192.168.0.9" - ;; "/home/matt/data/megatest/tests/simplerun" - ;; ".db/1.db") - (match - res - ((host port servkey pid ipaddr apath dbname) - (debug:print-info 0 *default-log-port* "got "res) - (hash-table-set! conns - fullname - (make-conndat - apath: apath - dbname: dbname - hostport: (conc host":"port) - socket: (open-nn-connection (conc host":"port)) - ipaddr: ipaddr - port: port - srvkey: servkey - lastmsg: (current-seconds) - expires: (+ (current-seconds) 60)))) - (else - (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) - res) - (begin - (debug:print-info 0 *default-log-port* "Unexpected result: " res) - res))))))) - - #t)) - -;;====================================================================== - -;; FOR DEBUGGING SET TO #t -;; (define *localmode* #t) -(define *localmode* #f) -(define *dbstruct* (make-dbr:dbstruct)) - -;; Defaults to current area -;; -(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) - ;; (if (not *remotedat*)(set! *remotedat* (make-remotedat))) - (let* ((apath *toppath*) - (remdat *remotedat*) - (conns (remotedat-conns remdat)) ;; just checking that remdat is a remotedat - (dbname (db:run-id->dbname rid))) - (if *localmode* - (let* ((dbdat (dbr:dbstruct-get-dbdat *dbstruct* dbname)) - (indat `((cmd . ,cmd)(params . ,params)))) - (api:process-request *dbstruct* indat) - ;; (api:process-request dbdat indat) - ) - (begin - (rmt:open-main-connection remdat apath) - (if rid (rmt:general-open-connection remdat apath dbname)) - (rmt:send-receive-real remdat apath dbname cmd params))))) - -#;(define (rmt:send-receive-setup conn) - (if (not (conndat-inport conn)) - (let-values (((i o) (tcp-connect (conndat-ipaddr conn) - (conndat-port conn)))) - (conndat-inport-set! conn i) - (conndat-outport-set! conn o)))) - -;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed -;; sometime in the future -;; -(define (rmt:send-receive-real remdat apath dbname cmd params) - (let* ((conn (rmt:get-conn remdat apath dbname))) - (assert conn "FATAL: rmt:send-receive-real called without the needed channels opened") - (assert (conndat-socket conn) "FATAL: rmt:send-receive-real called without the channel socket opened.") - (let* ((soc (conndat-socket conn)) - (key #f) - (host (conndat-ipaddr conn)) - (port (conndat-port conn)) - (payload `((cmd . ,cmd) - (key . ,(conndat-srvkey conn)) - (params . ,params))) - (res (send-receive-nn soc ;; (open-send-receive-nn (conc host":"port) - (sexpr->string payload)))) - (if (member res '("#")) ;; TODO - fix this in string->sexpr - #f - (string->sexpr res))))) - -;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed -;; sometime in the future. -;; -;; Purpose - call the main.db server and request a server be started -;; for the given area path and dbname -;; -;; (define (rmt:send-receive-server-start remdat apath dbname) -;; (let* ((conn (rmt:get-conn remdat apath dbname))) -;; (assert conn "FATAL: Unable to connect to db "apath"/"dbname) -;; #;(let* ((res (with-input-from-request -;; (conndat->uri conn "api") -;; `((params . (,apath ,dbname))) -;; read-string))) -;; (string->sexpr res)))) - -(define (rmt:print-db-stats) - (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" - (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================") - (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) - (for-each (lambda (cmd) - (let ((cmd-dat (hash-table-ref *db-stats* cmd))) - (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) - (sort (hash-table-keys *db-stats*) - (lambda (a b) - (> (vector-ref (hash-table-ref *db-stats* a) 0) - (vector-ref (hash-table-ref *db-stats* b) 0))))))) - -(define (rmt:get-max-query-average run-id) - (mutex-lock! *db-stats-mutex*) - (let* ((runkey (conc "run-id=" run-id " ")) - (cmds (filter (lambda (x) - (substring-index runkey x)) - (hash-table-keys *db-stats*))) - (res (if (null? cmds) - (cons 'none 0) - (let loop ((cmd (car cmds)) - (tal (cdr cmds)) - (max-cmd (car cmds)) - (res 0)) - (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) - (tot (vector-ref cmd-dat 0)) - (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction - (currmax (max res curravg)) - (newmax-cmd (if (> curravg res) cmd max-cmd))) - (if (null? tal) - (if (> tot 10) - (cons newmax-cmd currmax) - (cons 'none 0)) - (loop (car tal)(cdr tal) newmax-cmd currmax))))))) - (mutex-unlock! *db-stats-mutex*) - res)) - + + dbmgrmod + ulex + ) ;;====================================================================== ;; ;; A C T U A L A P I C A L L S ;; ;;====================================================================== -;;====================================================================== -;; S E R V E R -;;====================================================================== - -(define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server #f (list run-id))) - -(define (rmt:start-server run-id) - (rmt:send-receive 'start-server #f (list run-id))) - -(define (rmt:get-server-info apath dbname) - (rmt:send-receive 'get-server-info #f (list apath dbname))) - ;;====================================================================== ;; M I S C ;;====================================================================== (define (rmt:login run-id) @@ -1066,10 +721,11 @@ (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) (if (or (not smallest-time) (< last-update smallest-time)) (hash-table-set! smallest-last-update-time "smallest-time" last-update)) (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) #f))))))) + (define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) (let ((test-ht (hash-table-ref cached-info 'tests)) (data-ht (hash-table-ref cached-info 'data))) (for-each (lambda (test-data-id) @@ -1520,59 +1176,10 @@ (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) -;; host and port are used to ensure we are remove proper records -(define (rmt:server-shutdown host port) - (let ((dbfile (servdat-dbfile *server-info*))) - (debug:print-info 0 *default-log-port* "dbfile is "dbfile) - (if dbfile - (let* ((am-server (args:get-arg "-server")) - (dbfile (args:get-arg "-db")) - (apath *toppath*) - (remdat *remotedat*)) ;; foundation for future fix - (if *dbstruct-db* - (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) - (db (dbr:dbdat-db dbdat)) - (inmem (dbr:dbdat-db dbdat)) - ) - ;; do a final sync here - (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) - (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) - ;; let's finalize here - (debug:print-info 0 *default-log-port* "Finalizing db and inmem") - (if (sqlite3:database? db) - (sqlite3:finalize! db) - (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) - (if (sqlite3:database? inmem) - (sqlite3:finalize! inmem) - (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) - (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) - (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) - (if (not am-server) - (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") - (if (string-match ".*/main.db$" dbfile) - (let ((pkt-file (conc (get-pkts-dir *toppath*) - "/" (servdat-uuid *server-info*) - ".pkt"))) - (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) - (delete-file* pkt-file) - (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) - (db:with-lock-db - (servdat-dbfile *server-info*) - (lambda (dbh dbfile) - (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove - (let* ((sdat *server-info*) ;; we have a run-id server - (host (servdat-host sdat)) - (port (servdat-port sdat)) - (uuid (servdat-uuid sdat)) - (res (rmt:deregister-server remdat *toppath* host port uuid dbfile))) - (debug:print-info 0 *default-log-port* "deregistered-server, res="res) - (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) - ))))))) - (define (std-exit-procedure) ;;(common:telemetry-log-close) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) (let ((no-hurry (if (bdat-time-to-exit *bdat*) ;; hurry up @@ -1584,16 +1191,20 @@ (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds (let* ((start-time (current-seconds))) - (if (and *server-info* - *unclean-shutdown*) - (begin - (debug:print-info 0 *default-log-port* "Unclean server exit, calling server-shtudown") - (rmt:server-shutdown (servdat-host *server-info*) - (servdat-port *server-info*)))) + #;(if *db-serv-info* + (let* ((host (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*))) + (debug:print-info 0 *default-log-port* "Shutting down server/responder.") + ;; + ;; TODO - add flushing/waiting on the work queue + ;; + (rmt:server-shutdown host port) + (portlogger:open-run-close portlogger:set-port port "released"))) + (debug:print-info 0 *default-log-port* "Shutdown activities completed in "(- (current-seconds) start-time)" seconds")) ;; (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated #;(if (bdat-task-db *bdat*) ;; TODO: Check that this is correct for task db (let ((db (cdr (bdat-task-db *bdat*)))) (if (sqlite3:database? db) @@ -1623,27 +1234,21 @@ ) ) 0) - -(define (common:run-sync?) - ;; (and (common:on-homehost?) - (args:get-arg "-server")) - ;; called in megatest.scm, host-port is string hostname:port ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running ;; in the same process as the server. ;; ;; conn is a conndat record ;; -(define (server:ping conn #!key (do-exit #f)) - (let* ((req (conndat-socket conn)) - (srvkey (conndat-srvkey conn)) +#;(define (server:ping uconn #!key (do-exit #f)) + (let* ((srvkey (conndat-srvkey uconn)) (msg (sexpr->string '(ping ,srvkey)))) - (send-receive-nn req msg))) ;; (server-ready? host port server-id)) + (send-receive uconn 'ping msg))) ;; (server-ready? host port server-id)) ;;====================================================================== ;; http-transportmod.scm contents moved here ;;====================================================================== @@ -1654,124 +1259,15 @@ ;;====================================================================== ;; S E R V E R ;; ====================================================================== -(define (http-get-function fnkey) - (hash-table-ref/default *http-functions* fnkey (lambda () "nothing here yet"))) - -;; Main entry point to start a server. was start-server -(define (rmt:run hostn) - ;; ;; Configurations for server - ;; (tcp-buffer-size 2048) - ;; (max-connections 2048) - (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") - (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (port (portlogger:open-run-close portlogger:find-port)) - ;; (link-tree-path (common:get-linktree)) - ;; (tmp-area (common:get-db-tmp-area)) - #;(start-file (conc tmp-area "/.server-start"))) - (debug:print-info 0 *default-log-port* "portlogger recommended port: " port) - (if *server-info* - (begin - (servdat-host-set! *server-info* ipaddrstr) - (servdat-port-set! *server-info* port) - (servdat-status-set! *server-info* 'trying-port) - (servdat-trynum-set! *server-info* (+ (servdat-trynum *server-info*) 1))) - (set! *server-info* (make-servdat host: ipaddrstr port: port))) - (let* ((rep (rmt:try-start-server ipaddrstr port))) - (let loop ((instr (nng-recv rep))) - (let* ((data (string->sexpr instr)) - (res (case data - ((quit) 'quit) - (else (api:process-request *dbstruct-db* data)))) - (resdat (sexpr->string res))) - (if (not (eq? res 'quit)) - (begin - (set! *db-last-access* (current-seconds)) - (nng-send rep resdat) - (loop (nng-recv rep))))))) - (debug:print-info 0 *default-log-port* "After server, should never see this") - ;; server exit stuff here - (let* ((portnum (servdat-port *server-info*)) - (host (servdat-host *server-info*))) - (portlogger:open-run-close portlogger:set-port portnum "released") - (if (not (equal? (get-host-name) host)) - (debug:print-info 0 *default-log-port* "Server shutdown called for host "host", but we are on "(get-host-name)) - (rmt:server-shutdown host portnum)) - ;; (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up - (portlogger:open-run-close portlogger:set-port port "released") ;; done in rmt:run - ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - ;; (debug:print-info 0 *default-log-port* "Average cached write time " - ;; (if (eq? *number-of-writes* 0) - ;; "n/a (no writes)" - ;; (/ *writes-total-delay* - ;; *number-of-writes*)) - ;; " ms") - ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - ;; (debug:print-info 0 *default-log-port* "Average non-cached time " - ;; (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - ;; " ms") - - (db:print-current-query-stats) - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - ))) - -(define (rmt:try-start-server ipaddrstr portnum) - (if *server-info* ;; update the server info as we might be trying next port - (begin - (servdat-host-set! *server-info* ipaddrstr) - (servdat-port-set! *server-info* portnum) - (servdat-status-set! *server-info* 'trying-port) - (servdat-trynum-set! *server-info* - (+ (servdat-trynum *server-info*) 1))) - (set! *server-info* (make-servdat host: ipaddrstr port: portnum))) - (debug:print-info 0 *default-log-port* "rmt:try-start-server time=" - (seconds->time-string (current-seconds)) - " ipaddrsstr=" ipaddrstr - " portnum=" portnum) - (assert (servdat? *server-info*) "FATAL: Must always have *server-info* properly set up by here.") - (servdat-status-set! *server-info* 'starting) - (servdat-port-set! *server-info* portnum) - (if (not (servdat-rep *server-info*)) - (let ((rep (make-rep-socket))) - (servdat-rep-set! *server-info* rep) - (socket-set! rep 'nng/recvtimeo 2000))) - (let* ((rep (servdat-rep *server-info*))) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - ;; (thread-sleep! 0.1) - (rmt:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum)))) - (nng-listen rep (conc "tcp://*:" portnum)) - rep))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) + +;; (define (http-get-function fnkey) +;; (hash-table-ref/default *http-functions* fnkey (lambda () "nothing here yet"))) ;;====================================================================== ;; C L I E N T S ;;====================================================================== @@ -1814,722 +1310,13 @@ (define (rmt:inc-requests-and-prep-to-close-all-connections) (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) -;;====================================================================== -;; NEW SERVER METHOD -;;====================================================================== - -;; only use for main.db - need to re-write some of this :( -;; -(define (get-lock-db sdat dbfile host port) - (assert host "FATAL: get-lock-db called with host not set.") - (assert port "FATAL: get-lock-db called with port not set.") - (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations - (res (db:get-iam-server-lock dbh dbfile host port))) - ;; res => list then already locked, check server is responsive - ;; => #t then sucessfully got the lock - ;; => #f reserved for future use as to indicate something went wrong - (match res - ((owner_pid owner_host owner_port event_time) - (if (server-ready? owner_host owner_port "abc") - #f ;; locked by someone else - (begin ;; locked by someone dead and gone - (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") - (db:steal-lock-db dbh dbfile port)))) - (#t #t) ;; placeholder so that we don't touch res if it is #t - (else (set! res #f))) - (sqlite3:finalize! dbh) - res)) - - -(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) - (let* ((pkt-dat `((host . ,host) - (port . ,port) - (servkey . ,servkey) - (pid . ,(current-process-id)) - (ipaddr . ,ipaddr) - (dbpath . ,dbpath))) - (uuid (write-alist->pkt - pkts-dir - pkt-dat - pktspec: pkt-spec - ptype: 'server))) - (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) - uuid)) - -(define (get-pkts-dir #!optional (apath #f)) - (let* ((effective-toppath (or *toppath* apath))) - (assert effective-toppath - "ERROR: get-pkts-dir called without *toppath* set. Exiting.") - (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) - (if (file-exists? pdir) - pdir - (begin - (create-directory pdir #t) - pdir))))) - -;; given a pkts dir read -;; -(define (get-all-server-pkts pktsdir-in pktspec) - (let* ((pktsdir (if (file-exists? pktsdir-in) - pktsdir-in - (begin - (create-directory pktsdir-in #t) - pktsdir-in))) - (all-pkt-files (glob (conc pktsdir "/*.pkt")))) - (map (lambda (pkt-file) - (read-pkt->alist pkt-file pktspec: pktspec)) - all-pkt-files))) - -(define (server-address srv-pkt) - (conc (alist-ref 'host srv-pkt) ":" - (alist-ref 'port srv-pkt))) - -(define (server-ready? host port key) ;; server-address is host:port - (let* ((data (sexpr->string `((cmd . ping) - (key . ,key) - (params . ())))) - (res (open-send-receive-nn (conc host ":" port) data))) - (if res - (string->sexpr res) - res))) - -; from the pkts return servers associated with dbpath -;; NOTE: Only one can be alive - have to check on each -;; in the list of pkts returned -;; -(define (get-viable-servers serv-pkts dbpath) - (let loop ((tail serv-pkts) - (res '())) - (if (null? tail) - res ;; NOTE: sort by age so oldest is considered first - (let* ((spkt (car tail))) - (loop (cdr tail) - (if (equal? dbpath (alist-ref 'dbpath spkt)) - (cons spkt res) - res)))))) - -(define (remove-pkts-if-not-alive serv-pkts) - (filter (lambda (pkt) - (let* ((host (alist-ref 'host pkt)) - (port (alist-ref 'port pkt)) - (key (alist-ref 'servkey pkt)) - (pktz (alist-ref 'Z pkt)) - (res (server-ready? host port key))) - (if res - res - (let* ((pktsdir (get-pkts-dir *toppath*)) - (pktpath (conc pktsdir"/"pktz".pkt"))) - (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) - (delete-file* pktpath) - #f)))) - serv-pkts)) - -;; from viable servers get one that is alive and ready -;; -(define (get-the-server apath serv-pkts) - (let loop ((tail serv-pkts)) - (if (null? tail) - #f - (let* ((spkt (car tail)) - (host (alist-ref 'ipaddr spkt)) - (port (alist-ref 'port spkt)) - (dbpth (alist-ref 'dbpath spkt)) - (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt)) - (addr (server-address spkt))) - (if (server-ready? host port srvkey) - spkt - (loop (cdr tail))))))) - -;; am I the "first" in line server? I.e. my D card is smallest -;; use Z card as tie breaker -;; -(define (get-best-candidate serv-pkts dbpath) - (if (null? serv-pkts) - #f - (let loop ((tail serv-pkts) - (best (car serv-pkts))) - (if (null? tail) - best - (let* ((candidate (car tail)) - (candidate-bd (string->number (alist-ref 'D candidate))) - (best-bd (string->number (alist-ref 'D best))) - ;; bigger number is younger - (candidate-z (alist-ref 'Z candidate)) - (best-z (alist-ref 'Z best)) - (new-best (cond - ((> best-bd candidate-bd) ;; best is younger than candidate - candidate) - ((< best-bd candidate-bd) ;; candidate is younger than best - best) - (else - (if (string>=? best-z candidate-z) - best - candidate))))) ;; use Z card as tie breaker - (if (null? tail) - new-best - (loop (cdr tail) new-best))))))) - - -;;====================================================================== -;; END NEW SERVER METHOD -;;====================================================================== - -;; if .db/main.db check the pkts -;; -(define (rmt:wait-for-server pkts-dir db-file server-key) - (let* ((sdat *server-info*)) - (let loop ((start-time (current-seconds)) - (changed #t) - (last-sdat "not this")) - (begin ;; let ((sdat #f)) - (thread-sleep! 0.01) - (debug:print-info 0 *default-log-port* "Waiting for server alive signature") - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if (and sdat - (not changed) - (> (- (current-seconds) start-time) 2)) - (begin - (servdat-status-set! sdat 'iface-stable) - (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server") - ;; create a server pkt in *toppath*/.meta/srvpkts - - ;; TODO: - ;; 1. change sdat to stuct - ;; 2. add uuid to struct - ;; 3. update uuid in sdat here - ;; - (servdat-uuid-set! sdat - (register-server - pkts-dir *srvpktspec* - (get-host-name) - (servdat-port sdat) server-key - (servdat-host sdat) db-file)) - ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key - ;; now read pkts and see if we are a contender - (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) - (viables (get-viable-servers all-pkts db-file)) - (alive (remove-pkts-if-not-alive viables)) - (best-srv (get-best-candidate alive db-file)) - (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) - (i-am-srv (equal? best-srv-key server-key)) - (delete-pkt (lambda () - (let* ((pktfile (conc (get-pkts-dir *toppath*) - "/" (servdat-uuid *server-info*) - ".pkt"))) - (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) - (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit - (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) - ;; am I the best-srv, compare server-keys to know - (if i-am-srv - (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) - (begin - (debug:print-info 0 *default-log-port* "I'm the server!") - (servdat-dbfile-set! sdat db-file) - (servdat-status-set! sdat 'db-locked)) - (begin - (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") - (bdat-time-to-exit-set! *bdat* #t) - (delete-pkt) - (thread-sleep! 0.2) - (exit))) - (begin - (debug:print-info 0 *default-log-port* - "Keys do not match "best-srv-key", "server-key", exiting.") - (bdat-time-to-exit-set! *bdat* #t) - (delete-pkt) - (thread-sleep! 0.2) - (exit))) - sdat)) - (begin ;; sdat not yet contains server info - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (begin - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (exit)) - (loop start-time - (equal? sdat last-sdat) - sdat)))))))) - -(define (rmt:register-server remdat apath iface port server-key dbname) - (remotedat-conns remdat) ;; just checking types - (rmt:open-main-connection remdat apath) ;; we need a channel to main.db - (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath - (db:run-id->dbname #f) - 'register-server `(,iface - ,port - ,server-key - ,(current-process-id) - ,iface - ,apath - ,dbname))) - -(define (rmt:get-count-servers remdat apath) - (remotedat-conns remdat) ;; just checking types - (rmt:open-main-connection remdat apath) ;; we need a channel to main.db - (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath - (db:run-id->dbname #f) - 'get-count-servers `(,apath))) - -(define (rmt:get-servers-info apath) - (rmt:send-receive 'get-servers-info #f `(,apath))) - -(define (rmt:deregister-server remdat apath iface port server-key dbname) - (remotedat-conns remdat) ;; just checking types - (rmt:open-main-connection remdat apath) ;; we need a channel to main.db - (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath - (db:run-id->dbname #f) - 'deregister-server `(,iface - ,port - ,server-key - ,(current-process-id) - ,iface - ,apath - ,dbname))) - -(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) - ;; wait until *server-info* stops changing - (let* ((stime (current-seconds))) - (let loop ((last-host #f) - (last-port #f) - (tries 0)) - (let* ((curr-host (and *server-info* (servdat-host *server-info*))) - (curr-port (and *server-info* (servdat-port *server-info*)))) - ;; first we verify port and interface, update *server-info* in need be. - (cond - ((> tries num-tries-allowed) - (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") - (exit 1)) - ((not *server-info*) - (thread-sleep! 0.25) - (loop curr-host curr-port (+ tries 1))) - ((or (not last-host)(not last-port)) - (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) - (thread-sleep! 0.25) - (loop curr-host curr-port (+ tries 1))) - ((or (not (equal? last-host curr-host)) - (not (equal? last-port curr-port))) - (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") - (thread-sleep! 0.25) - (loop curr-host curr-port (+ tries 1))) - ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed - (thread-sleep! 0.5) - (loop curr-host curr-port (+ tries 1))) - (else - (rmt:get-signature) ;; sets *my-signature* as side effect - (servdat-status-set! *server-info* 'interface-stable) - (debug:print 0 *default-log-port* - "SERVER STARTED: " curr-host - ":" curr-port - " AT " (current-seconds) " server signature: " *my-signature* - " with "(servdat-trynum *server-info*)" port changes") - (flush-output *default-log-port*) - #t)))))) - -;; run rmt:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (rmt:keep-running dbname) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - - (let* ((remdat *remotedat*) - (server-start-time (current-seconds)) - (pkts-dir (get-pkts-dir)) - (server-key (rmt:get-signature)) ;; This servers key - (is-main (equal? (args:get-arg "-db") ".db/main.db")) - (last-access 0) - (server-timeout (server:expiration-timeout)) - (shutdown-server-sequence (lambda (host port) - (set! *unclean-shutdown* #f) - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - (rmt:server-shutdown host port) - (portlogger:open-run-close portlogger:set-port port "released") - (exit))) - (timed-out? (lambda () - (<= (+ last-access server-timeout) - (current-seconds))))) - (servdat-dbfile-set! *server-info* (args:get-arg "-db")) - ;; main and run db servers have both got wait logic (could/should merge it) - (if is-main - (rmt:wait-for-server pkts-dir dbname server-key) - (rmt:wait-for-stable-interface)) - ;; this is our forever loop - (let* ((iface (servdat-host *server-info*)) - (port (servdat-port *server-info*))) - (let loop ((count 0) - (bad-sync-count 0) - (start-time (current-milliseconds))) - (if (and (not is-main) - (common:low-noise-print 60 "servdat-status")) - (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *server-info*))) - - ;; set up the database handle - (mutex-lock! *heartbeat-mutex*) - (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate - (let ((watchdog (bdat-watchdog *bdat*))) - (debug:print 0 *default-log-port* "SERVER: dbprep") - (db:setup dbname) ;; sets *dbstruct-db* as side effect - (servdat-status-set! *server-info* 'db-opened) - ;; IFF I'm not main, call into main and register self - (if (not is-main) - (let ((res (rmt:register-server remdat - *toppath* iface port - server-key dbname))) - (if res ;; we are the server - (servdat-status-set! *server-info* 'have-interface-and-db) - ;; now check that the db locker is alive, clear it out if not - (let* ((serv-info (rmt:get-server-info *toppath* dbname))) - (match serv-info - ((host port servkey pid ipaddr apath dbpath) - (if (not (server-ready? host port servkey)) - (begin - (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") - (rmt:deregister-server remdat apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) - - (loop (+ count 1) bad-sync-count start-time)))) - (else - (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) - (exit))))))) - (debug:print 0 *default-log-port* - "SERVER: running, db "dbname" opened, megatest version: " - (common:get-full-version)) - ;; start the watchdog - - ;; is this really needed? - - #;(if watchdog - (if (not (member (thread-state watchdog) - '(ready running blocked - sleeping dead))) - (begin - (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")") - (thread-start! watchdog)) - (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")")) - (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it.")) - #;(loop (+ count 1) bad-sync-count start-time) - )) - - (debug:print-info 0 *default-log-port* "syncing "*toppath*" "dbname" at "(current-seconds)) - (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) - - (mutex-unlock! *heartbeat-mutex*) - - ;; when things go wrong we don't want to be doing the various - ;; queries too often so we strive to run this stuff only every - ;; four seconds or so. - (let* ((sync-time (- (current-milliseconds) start-time)) - (rem-time (quotient (- 4000 sync-time) 1000))) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time))) - - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) bad-sync-count (current-milliseconds))) - - ;; Transfer *db-last-access* to last-access to use in checking that we are still alive - (set! last-access *db-last-access*) - - (if (common:low-noise-print 60 "dbstats") - (begin - (debug:print 0 *default-log-port* "Server stats:") - (db:print-current-query-stats))) - (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) - (cond - ((not *server-run*) - (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") - (shutdown-server-sequence (get-host-name) port)) - ((timed-out?) - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (shutdown-server-sequence (get-host-name) port)) - ((and *server-run* - (or (not (timed-out?)) - (if is-main ;; do not exit if there are other servers (keep main open until all others gone) - (> (rmt:get-count-servers remdat *toppath*) 1) - #f))) - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) - (loop 0 bad-sync-count (current-milliseconds))) - (else - (set! *unclean-shutdown* #f) - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (shutdown-server-sequence (get-host-name) port) - #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " - (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown - (sexpr->string 'quit))) - ))))))) - -;; Call this to start the actual server -;; -;; all routes though here end in exit ... -;; -;; This is the point at which servers are started -;; -(define (rmt:server-launch dbname) - (debug:print-info 0 *default-log-port* "Entered rmt:server-launch") - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (rmt:run (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (rmt:keep-running dbname) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (thread-join! th3) - ;; (exit)) - ) - #f - ) - -;; Generate a unique signature for this process, used at both client and -;; server side -(define (rmt:mk-signature) - (message-digest-string (md5-primitive) - (with-output-to-string - (lambda () - (write (list (current-directory) - (current-process-id) - (argv))))))) - -(define (rmt:get-signature) - (if *my-signature* *my-signature* - (let ((sig (rmt:mk-signature))) - (set! *my-signature* sig) - *my-signature*))) - -;;====================================================================== -;; Nanomsg transport -;;====================================================================== - -(define (is-port-in-use port-num) - (let* ((ret #f)) - (let-values (((inp oup pid) - (process "netstat" (list "-tulpn" )))) - (let loop ((inl (read-line inp))) - (if (not (eof-object? inl)) - (begin - (if (string-search (regexp (conc ":" port-num)) inl) - (begin - ;(print "Output: " inl) - (set! ret #t)) - (loop (read-line inp))))))) - ret)) - -;;start a server, returns the connection -;; -(define (start-nn-server portnum ) - (let ((rep (make-rep-socket))) ;; (nn-socket 'rep))) - (socket-set! rep 'nng/recvtimeo 2000) - (handle-exceptions ;; why have exception handler here? - exn - (let ((emsg ((condition-property-accessor 'exn 'message) exn))) - (print "ERROR: Failed to start server \"" emsg "\"") - (exit 1)) - - (nng-dial #;nn-bind rep (conc "tcp://*:" portnum))) - rep)) - -(define (open-nn-connection host-port) - (let ((req (make-req-socket)) - (uri (conc "tcp://" host-port))) - (nng-dial req uri) - (socket-set! req 'nng/recvtimeo 2000) - req)) - -(define (send-receive-nn req msg) - (nng-send req msg) - (nng-recv req)) - -(define (close-nn-connection req) - (nng-close! req)) - -;; ;; open connection to server, send message, close connection -;; ;; -;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds -;; (let ((req (make-req-socket 'req)) -;; (uri (conc "tcp://" host-port)) -;; (res #f) -;; ;; (contacts (alist-ref 'contact attrib)) -;; ;; (mode (alist-ref 'mode attrib)) -;; ) -;; (socket-set! req 'nng/recvtimeo 2000) -;; (handle-exceptions -;; exn -;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) -;; ;; Send notification -;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) -;; #f) -;; (nng-dial req uri) -;; ;; (print "Connected to the server " ) -;; (nng-send req msg) -;; ;; (print "Request Sent") -;; (let* ((th1 (make-thread (lambda () -;; (let ((resp (nng-recv req))) -;; (nng-close! req) -;; (set! res (if (equal? resp "ok") -;; #t -;; #f)))) -;; "recv thread")) -;; (th2 (make-thread (lambda () -;; (thread-sleep! timeout) -;; (thread-terminate! th1)) -;; "timer thread"))) -;; (thread-start! th1) -;; (thread-start! th2) -;; (thread-join! th1) -;; res)))) -;; -(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds - (let ((req (make-req-socket)) - (uri (conc "tcp://" host-port)) - (res #f)) - (handle-exceptions - exn - (let ((emsg ((condition-property-accessor 'exn 'message) exn))) - ;; Send notification - (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn) - #f) - (nng-dial req uri) - (nng-send req msg) - (let* ((th1 (make-thread (lambda () - (let ((resp (nng-recv req))) - (nng-close! req) - ;; (print resp) - (set! res resp))) - "recv thread")) - (th2 (make-thread (lambda () - (thread-sleep! timeout) - (thread-terminate! th1)) - "timer thread"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - res)))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;; run ping in separate process, safest way in some cases -;; -#;(define (server:ping-server ifaceport) - (with-input-from-pipe - (conc (common:get-megatest-exe) " -ping " ifaceport) - (lambda () - (let loop ((inl (read-line)) - (res "NOREPLY")) - (if (eof-object? inl) - (case (string->symbol res) - ((NOREPLY) #f) - ((LOGIN_OK) #t) - (else #f)) - (loop (read-line) inl)))))) - -;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). -;; -#;(define (server:login toppath) - (lambda (toppath) - (set! *db-last-access* (current-seconds)) ;; might not be needed. - (if (equal? *toppath* toppath) - #t - #f))) - -;; (define server:sync-lock-token "SERVER_SYNC_LOCK") -;; (define (server:release-sync-lock) -;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -;; (define (server:have-sync-lock?) -;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) -;; (have-lock? (car have-lock-pair)) -;; (lock-time (cdr have-lock-pair)) -;; (lock-age (- (current-seconds) lock-time))) -;; (cond -;; (have-lock? #t) -;; ((>lock-age -;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) -;; (server:release-sync-lock) -;; (server:have-sync-lock?)) -;; (else #f)))) - ) ;;====================================================================== ;; A T T I C ;;====================================================================== - - ;; (handle-directory spiffy-directory-listing) -;; #;(handle-exception (lambda (exn chain) -;; (signal (make-composite-condition -;; (make-property-condition -;; 'server -;; 'message "server error"))))) -;; -;; ;; Setup the web server and a /ctrl interface -;; ;; -;; (vhost-map `(((* any) . ,(lambda (continue) -;; ;; open the db on the first call -;; ;; This is were we set up the database connections -;; (let* (($ (request-vars source: 'both)) -;; ;; (dat ($ 'dat)) -;; (res #f)) -;; (cond -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "api")) -;; (debug:print 0 *default-log-port* "In api request $=" $) -;; (send-response ;; the $ is the request vars proc -;; body: (http-handle-api *dbstruct-db* $) -;; headers: '((content-type text/plain))) -;; (set! *db-last-access* (current-seconds))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "ping")) -;; (send-response body: (conc *toppath*"/"(args:get-arg "-db")) -;; headers: '((content-type text/plain)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "loop-test")) -;; (send-response body: (alist-ref 'data ($)) -;; headers: '((content-type text/plain)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "")) -;; (send-response body: ((http-get-function 'rmt:main-page)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "json_api")) -;; (send-response body: ((http-get-function 'rmt:main-page)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "runs")) -;; (send-response body: ((http-get-function 'rmt:main-page)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ any)) -;; (send-response body: "hey there!\n" -;; headers: '((content-type text/plain)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "hey")) -;; (send-response body: "hey there!\n" -;; headers: '((content-type text/plain)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "jquery3.1.0.js")) -;; (send-response body: ((http-get-function 'rmt:show-jquery)) -;; headers: '((content-type application/javascript)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "test_log")) -;; (send-response body: ((http-get-function 'rmt:html-test-log) $) -;; headers: '((content-type text/HTML)))) -;; ((equal? (uri-path (request-uri (current-request))) -;; '(/ "dashboard")) -;; (send-response body: ((http-get-function 'rmt:html-dboard) $) -;; headers: '((content-type text/HTML)))) -;; (else (continue)))))))) Index: runsmod.scm ================================================================== --- runsmod.scm +++ runsmod.scm @@ -244,10 +244,24 @@ (begin (hash-table-set! *runs:denoise* key currtime) #t) #f))) +(define *too-soon-delays* (make-hash-table)) +(define *last-test-launch* 0) + +;; to-soon delay, when matching event happened in less than dseconds delay wseconds +;; +(define (runs:too-soon-delay key dseconds wseconds) + (let* ((last-time (hash-table-ref/default *too-soon-delays* key #f))) + (if (and last-time + (< (- (current-seconds) last-time) dseconds)) + (begin + (debug:print-info 4 *default-log-port* "Whoa, slow down there ... "key" has been too recently seen.") + (thread-sleep! wseconds))) + (hash-table-set! *too-soon-delays* key (current-seconds)))) + (define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) @@ -364,11 +378,11 @@ (allowed-tests #f) (runconf #f)) ;; check if readonly (when readonly-mode - (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.") + (debug:print-error 0 *default-log-port* *toppath* ".db/main.db is readonly. Cannot proceed.") (exit 1)) ;; per user request. If less than 100Meg space on dbdir partition, bail out with error ;; this will reduce issues in database corruption (common:check-db-dir-and-exit-if-insufficient) @@ -499,11 +513,11 @@ ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) ;; mark all test launched flag as false in the meta table - (rmt:set-var run-id (conc "lunch-complete-" run-id) "no") + (rmt:set-var run-id (conc "launch-complete-" run-id) "no") (debug:print-info 1 *default-log-port* "Setting end-of-run to no") (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (config-rerun-cnt (if config-reruns config-reruns @@ -1125,10 +1139,11 @@ ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry runsdat testdat) + (set! *last-test-launch* (current-seconds)) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (or (not (null? tal))(not (null? reg))) @@ -1467,11 +1482,15 @@ newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) - + + (if (> (- (current-seconds) *last-test-launch*) 5) ;; be pretty aggressive for five seconds after + (runs:too-soon-delay (conc "loop delay " hed) 1 0.6) ;; starting a test then apply more delay + (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) + (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) @@ -1494,10 +1513,11 @@ (if (or (not (null? tal))(not (null? reg))) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) + ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) (debug:print 4 *default-log-port* "TOP OF LOOP => " "test-name: " test-name @@ -1709,16 +1729,16 @@ (else (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched - (rmt:set-var run-id (conc "lunch-complete-" run-id) "yes") + (rmt:set-var run-id (conc "launch-complete-" run-id) "yes") ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle + (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") @@ -1725,10 +1745,11 @@ (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) (> num-running 0)) (begin ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) + (thread-sleep! 5) ;; let's always sleep, prevents abutting calls to rum:get-count-tests-running-for-run-id - didn't help (if (> (current-seconds)(+ last-time-incomplete 900)) (let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id))) (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set! @@ -1735,11 +1756,10 @@ (runs:find-and-mark-incomplete-and-check-end-of-run run-id #f) (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))) ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) - (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1)) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. @@ -2556,11 +2576,12 @@ ;; Extract out stuff needed in most or many calls ;; here then call proc (let* ((keyvals (keys:target->keyval keys target))) (proc target runname keys keyvals))) ;; (if db (sqlite3:finalize! db)) - (set! *didsomething* #t)))))) + (set! *didsomething* #t) + ))))) ;;====================================================================== ;; Lock/unlock runs ;;====================================================================== ADDED rununit.sh Index: rununit.sh ================================================================== --- /dev/null +++ rununit.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +rm tests/*log tests/simplerun/logs/* + +script -c 'ck5 make unit' Index: serialize-env.scm ================================================================== --- serialize-env.scm +++ serialize-env.scm @@ -1,6 +1,7 @@ -(module serialize-env + +(module serialize-envmod * (import scheme z3 base64 chicken.port @@ -13,9 +14,9 @@ (let* ((env-str (with-output-to-string (lambda () (pp (get-environment-variables))))) (zipped-env-str (z3:encode-buffer env-str)) (b64-env-str (base64-encode zipped-env-str))) (print b64-env-str)) ) -) -(import serialize-env) + (gen-output) +) Index: tests/simplerun/Makefile ================================================================== --- tests/simplerun/Makefile +++ tests/simplerun/Makefile @@ -1,3 +1,5 @@ cleanup : - killall mtest -v -9;rm -rf .meta .db + killall mtest dboard -v -9 || true + rm -rf *.log *.bak NB* logs/* .meta .db ../simpleruns/* lt + ADDED tests/simplerun/debug.scm Index: tests/simplerun/debug.scm ================================================================== --- /dev/null +++ tests/simplerun/debug.scm @@ -0,0 +1,61 @@ + +(module junk + * + +(import big-chicken + rmtmod + apimod + dbmod + srfi-18 + trace) + +(trace-call-sites #t) +(trace + ;; db:get-tests-for-run + ;; rmt:general-open-connection + ;; rmt:open-main-connection + ;; rmt:drop-conn + ;; rmt:send-receive + ;; rmt:log-to-main + ) + +(define (make-run-id) + (let* ((s (conc (current-process-id))) + (l (string-length s))) + (string->number (substring s (- l 3) l)) + )) + +(define (run) + (let* ((th1 (make-thread + (lambda () + (let loop ((r 0) + (i 1) + (s 0)) ;; sum + (let ((start-time (current-milliseconds)) + (run-id (+ r (make-run-id)))) + (rmt:register-test run-id "test1" (conc "item_" i)) + (thread-sleep! 0.01) + (let* ((qry-time (- (current-milliseconds) start-time)) + (tot-query-time (+ qry-time s)) + (avg-query-time (* 1.0 (/ tot-query-time (max i 1))))) + (if (> qry-time 500) + (print "WARNING: rmt:register-test took more than 500ms, "qry-time"ms, i="i", avg-query-time="avg-query-time)) + (if (eq? (modulo i 100) 0) + (print "For run-id="run-id", "(rmt:get-keys-write)" num tests registered="i" avg-query-time="avg-query-time)) + (if (< i 500) + (loop r (+ i 1) tot-query-time) + (if (< r 100) + (let* ((start-time (current-milliseconds))) + (print "rmt:get-keys "(rmt:get-keys)" in "(- (current-milliseconds) start-time)) + ;; run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode + (print "Got "(length (rmt:get-tests-for-run run-id "%" '() '() #f #f #f #f #f #f 0 #f))" tests for run "run-id) + (print "Average query time: "avg-query-time) + (loop (+ r 1) 0 tot-query-time)))))))) + ))) + (thread-start! th1) + (thread-join! th1))) + +(run) +) + + Index: tests/simplerun/megatest.config ================================================================== --- tests/simplerun/megatest.config +++ tests/simplerun/megatest.config @@ -21,10 +21,13 @@ [setup] # Adjust max_concurrent_jobs to limit how much you load your machines max_concurrent_jobs 50 +[server] +timeout 3600 + # Uncomment this to make the in-mem db into a disk based db (slower but good for debug) # be aware that some unit tests will fail with this due to persistent data # # tmpdb /tmp @@ -35,15 +38,15 @@ [validvalues] state start end completed # Job tools are more advanced ways to control how your jobs are launched [jobtools] -useshell yes +# useshell yes launcher nbfake # You can override environment variables for all your tests here [env-override] EXAMPLE_VAR example value # As you run more tests you may need to add additional disks, the names are arbitrary but must be unique [disks] disk0 #{getenv MT_RUN_AREA_HOME}/../simpleruns ADDED tests/simplerun/simple.scm Index: tests/simplerun/simple.scm ================================================================== --- /dev/null +++ tests/simplerun/simple.scm @@ -0,0 +1,2 @@ +(print (rmt:get-keys)) + Index: tests/simplerun/tests/test1/testconfig ================================================================== --- tests/simplerun/tests/test1/testconfig +++ tests/simplerun/tests/test1/testconfig @@ -24,11 +24,11 @@ [requirements] # waiton setup priority 0 # Iteration for your tests are controlled by the items section -[items] +# [items] # PARTOFDAY morning noon afternoon evening night # test_meta is a section for storing additional data on your test [test_meta] author matt Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -22,13 +22,17 @@ chicken.string chicken.process-context chicken.file chicken.pretty-print commonmod + ulex ) (define test-work-dir (current-directory)) + +(work-method 'mailbox) ;; threads, direct, mailbox +(return-method 'mailbox) ;; polling, mailbox, direct ;; given list of lists ;; ( ( msg expected param1 param2 ...) ;; ( ... ) ) ;; apply test to all @@ -49,12 +53,22 @@ ;; (for-each ;; (lambda (file) ;; (print "Loading " file) ;; (load file)) ;; files)) + +(define-syntax run-in-thread + (syntax-rules () + ((_ body ...) + (let ((th1 (make-thread (lambda () + body ...) + "the thread"))) + (thread-start! th1) + (thread-join! th1))))) + (let* ((unit-test-name (list-ref (argv) 4)) (fname (conc "../unittests/" unit-test-name ".scm"))) (if (file-exists? fname) (load fname) (print "ERROR: Unit test " unit-test-name " not found in unittests directory"))) Index: tests/unittests/all-rmt.scm ================================================================== --- tests/unittests/all-rmt.scm +++ tests/unittests/all-rmt.scm @@ -91,13 +91,15 @@ ;; (list (list "localhost" #t (get-host-name)) ;; (list "not-a-host" #t "not-a-host" )) ;; post-proc: pair?) ;; ;; (test #f #t (list? (rmt:get-changed-record-ids 0))) -;; -(test #f #f (begin (runs:update-all-test_meta #f) #f)) - +;; + +(run-in-thread + + ;; (test #f #f (begin (runs:update-all-test_meta #f) #f)) (test #f '("test1" "test2")(sort (alist-ref "tagtwo" (rmt:get-tests-tags) equal?) string<=?)) (test #f '() (rmt:get-key-val-pairs 0)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start (test #f '() (rmt:get-key-vals 1)) @@ -146,11 +148,11 @@ (test #f '()(rmt:get-prev-run-ids 1)) (test #f #t (begin (rmt:lock/unlock-run 1 #t #f "mikey") #t)) (test #f "JUSTFINE" (rmt:get-run-status 1)) (test #f #t (begin (rmt:set-run-status 1 "NOTFINE" msg: "A message") #t)) (test #f #t (begin (rmt:update-run-event_time 1) #t)) - +) ;; (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default ;; (let ((keys (rmt:get-keys)) (rnp "%") ;; run name patt (tpt "%/%")) ;; target patt Index: tests/unittests/basicserver.scm ================================================================== --- tests/unittests/basicserver.scm +++ tests/unittests/basicserver.scm @@ -21,21 +21,23 @@ ;; Run like this: ;; ;; ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) (import rmtmod trace http-client apimod dbmod - launchmod srfi-69) + launchmod srfi-69 ulex system-information) (trace-call-sites #t) (trace + ;; get-the-server ;; db:get-dbdat ;; rmt:find-main-server ;; rmt:send-receive-real ;; rmt:send-receive ;; sexpr->string ;; server-ready? ;; rmt:register-server + ;; api:run-server-process ;; rmt:open-main-connection ;; rmt:general-open-connection ;; rmt:get-conny ;; common:watchdog ;; rmt:find-main-server @@ -43,38 +45,43 @@ ;; get-viable-servers ;; get-best-candidate ;; api:run-server-process ;; rmt:run ;; rmt:try-start-server - ) - -(test #f #t (remotedat? (let ((r (make-remotedat))) - (set! *remotedat* r) - r))) -(test #f #f (rmt:get-conn *remotedat* *toppath* ".db/main.db")) -(test #f #f (rmt:find-main-server *toppath* ".db/main.db")) -(test #f #t (rmt:open-main-connection *remotedat* *toppath*)) -(pp (hash-table->alist (remotedat-conns *remotedat*))) -(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) - -(define *main* (rmt:get-conn *remotedat* *toppath* ".db/main.db")) - -;; (for-each (lambda (tdat) -;; (test #f tdat (loop-test (rmt:conn-ipaddr *main*) -;; (rmt:conn-port *main*) tdat))) -;; (list 'a -;; '(a "b" 123 1.23 ))) -(test #f #t (rmt:send-receive 'ping #f 'hello)) - -(define *db* (db:setup ".db/main.db")) - -;; these let me cut and paste from source easily -(define apath *toppath*) -(define dbname ".db/2.db") -(define remote *remotedat*) + ;; + ;; ulex + ;; + ;; wait-and-close + ;; run-listener + ) + + +(test #f #t (servdat? (let ((s (make-servdat))) + (set! *servdat* s) + s))) +(test #f #f (rmt:get-conn *servdat* *toppath* ".db/main.db")) +(test #f #f (rmt:find-main-server *servdat* *toppath* ".db/main.db")) +(define th1 (make-thread (lambda () + (rmt:run (get-host-name))) + "rmt:run thread")) +(thread-start! th1) +(thread-sleep! 0.5) ;; give things some time to get going +;; switch to *db-serv-info* instead of *servdat* +(define *uconn* (servdat-uconn *db-serv-info*)) +(print "*uconn*: " *uconn*) +(test #f #t (ulex-listener? (servdat-uconn *db-serv-info*))) +(test #f #t (string? (udat-host-port *uconn*))) + +(run-in-thread + (test #f 'ack (server-ready? *uconn* (udat-host-port *uconn*) (servdat-uuid *db-serv-info*)))) + +(test #f #t (rmt:open-main-connection *db-serv-info* *toppath*)) +;; (pp (hash-table->alist (remotedat-conns *db-serv-info*))) +(test #f #t (conndat? (rmt:get-conn *db-serv-info* *toppath* ".db/main.db"))) + +(define remote *db-serv-info*) (define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) -(test #f '() (string->sexpr "()")) -(test #f 'server-started (api:execute-requests *db* 'get-server (list *toppath* ".db/2.db"))) -(set! *dbstruct-db* #f) +(run-in-thread + (test #f (map car keyvals) (rmt:get-keys))) (exit) Index: tests/unittests/server.scm ================================================================== --- tests/unittests/server.scm +++ tests/unittests/server.scm @@ -53,21 +53,21 @@ (define *db* (db:setup ".db/main.db")) ;; these let me cut and paste from source easily (define apath *toppath*) (define dbname ".db/2.db") -(define remote *remotedat*) +(define remote *db-serv-info*) (define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) (test #f #t (rmt:open-main-connection remote apath)) -(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) -(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) -(test #f ".db/2.db" (list-ref (rmt:send-receive-real *remotedat* *toppath* ".db/main.db" 'get-server `(,apath ,dbname)) - 6)) +(test #f #t (conndat? (rmt:get-conn *db-serv-info* *toppath* ".db/main.db"))) +(test #f #t (conndat? (rmt:get-conn *db-serv-info* *toppath* ".db/main.db"))) +(test #f 'server-started (rmt:send-receive-real *db-serv-info* *toppath* ".db/main.db" + 'start-server `(,apath ,dbname))) (thread-sleep! 2) -(test #f #t (rmt:general-open-connection *remotedat* *toppath* ".db/2.db")) +(test #f #t (rmt:general-open-connection *db-serv-info* *toppath* ".db/2.db")) ;; (let loop ((end-time (+ (current-seconds) 61))) (test #f #t (list? (rmt:get-servers-info *toppath*))) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) @@ -75,16 +75,16 @@ ;; (print "Got here.") (test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) (test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) - ;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname + ;; (test #f 2 (rmt:deregister-server *db-serv-info* *toppath* iface port server-key dbname - (test #f 2 (rmt:get-count-servers *remotedat* *toppath*)) + (test #f 2 (rmt:get-count-servers *db-serv-info* *toppath*)) (test #f "run2" (rmt:get-run-name-from-id 2)) (test #f #f (rmt:send-receive 'get-test-info-by-id 2 '(2 1))) (test #f #t (rmt:general-call 'update-cpuload-diskfree 2 1.5 1e6 1)) ;; (if (< (current-seconds) end-time)(loop end-time))) (exit) ADDED ulex-dual/Makefile Index: ulex-dual/Makefile ================================================================== --- /dev/null +++ ulex-dual/Makefile @@ -0,0 +1,9 @@ +all : ulex.pdf ulex.png + +ulex.pdf : ulex.dot + dot -Tpdf ulex.dot -o ulex.pdf + +ulex.png : ulex.dot + dot -Tpng ulex.dot -o ulex.png + + ADDED ulex-dual/dbmgr.scm Index: ulex-dual/dbmgr.scm ================================================================== --- /dev/null +++ ulex-dual/dbmgr.scm @@ -0,0 +1,1003 @@ +;;====================================================================== +;; Copyright 2022, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbmgrmod)) + +(declare (uses ulex)) +(declare (uses apimod)) +(declare (uses pkts)) +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses portloggermod)) +(declare (uses debugprint)) + +(module dbmgrmod + * + +(import scheme + chicken.base + chicken.condition + chicken.file + chicken.format + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + + (prefix sqlite3 sqlite3:) + matchable + md5 + message-digest + regex + s11n + srfi-1 + srfi-18 + srfi-69 + system-information + typed-records + + pkts + ulex + + commonmod + apimod + dbmod + debugprint + (prefix mtargs args:) + portloggermod + ) + +;; Configurations for server +;; (tcp-buffer-size 2048) +;; (max-connections 2048) + +;; info about me as a listener and my connections to db servers +;; stored (for now) in *db-serv-info* +;; +(defstruct servdat + (host #f) + (port #f) + (uuid #f) + (dbfile #f) + (uconn #f) ;; this is the listener for this process + (mode #f) + (status 'starting) + (trynum 0) ;; count the number of ports we've tried + (conns (make-hash-table)) ;; apath/dbname => conndat + ) + +(define *db-serv-info* #f) + +(define (servdat->url sdat) + (conc (servdat-host sdat)":"(servdat-port sdat))) + +;; db servers contact info +;; +(defstruct conndat + (apath #f) + (dbname #f) + (fullname #f) + (hostport #f) + (ipaddr #f) + (port #f) + (srvpkt #f) + (srvkey #f) + (lastmsg 0) + (expires 0)) + +(define *srvpktspec* + `((server (host . h) + (port . p) + (servkey . k) + (pid . i) + (ipaddr . a) + (dbpath . d)))) + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +;; set up the api proc, seems like there should be a better place for this? +;; +;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE +;; +;; (define api-proc (make-parameter conc)) +;; (api-proc api:execute-requests) + +;; do we have a connection to apath dbname and +;; is it not expired? then return it +;; +;; else setup a connection +;; +;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception +;; +(define (rmt:get-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-ref/default (servdat-conns remdat) fullname #f))) + +(define (rmt:drop-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-delete! (servdat-conns remdat) fullname))) + +(define (rmt:find-main-server uconn apath dbname) + (let* ((pktsdir (get-pkts-dir apath)) + (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) + (viable-srvs (get-viable-servers all-srvpkts dbname))) + (get-the-server uconn apath viable-srvs))) + + +(define *connstart-mutex* (make-mutex)) +(define *last-main-start* 0) + +;; looks for a connection to main, returns if have and not exired +;; creates new otherwise +;; +;; connections for other servers happens by requesting from main +;; +;; TODO: This is unnecessarily re-creating the record in the hash table +;; +(define (rmt:open-main-connection remdat apath) + (let* ((fullpath (db:dbname->path apath ".db/main.db")) + (conns (servdat-conns remdat)) + (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this + (myconn (servdat-uconn remdat))) + (cond + ((not myconn) + (servdat-uconn-set! remdat (make-udat)) + (rmt:open-main-connection remdat apath)) + ((and conn ;; conn is NOT a socket, just saying ... + (< (current-seconds) (conndat-expires conn))) + #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died + ((and conn + (>= (current-seconds)(conndat-expires conn))) + (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") + (rmt:drop-conn remdat apath ".db/main.db") ;; + (rmt:open-main-connection remdat apath)) + (else + ;; Below we will find or create and connect to main + (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch") + (let* ((dbname (db:run-id->dbname #f)) + (the-srv (rmt:find-main-server myconn apath dbname)) + (start-main-srv (lambda () ;; call IF there is no the-srv found + (mutex-lock! *connstart-mutex*) + (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server + (begin + (api:run-server-process apath dbname) + (set! *last-main-start* (current-seconds)) + (thread-sleep! 1)) + (thread-sleep! 0.25)) + (mutex-unlock! *connstart-mutex*) + (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries + ))) + (if (not the-srv) ;; have server, try connecting to it + (start-main-srv) + (let* ((srv-addr (server-address the-srv)) ;; need serv + (ipaddr (alist-ref 'ipaddr the-srv)) + (port (alist-ref 'port the-srv)) + (srvkey (alist-ref 'servkey the-srv)) + (fullpath (db:dbname->path apath dbname)) + + (new-the-srv (make-conndat + apath: apath + dbname: dbname + fullname: fullpath + hostport: srv-addr + ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvpkt: the-srv + srvkey: srvkey ;; generated by rmt:get-signature on the server side + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2) ;; this needs to be gathered during the ping + ))) + (hash-table-set! conns fullpath new-the-srv))) + #t))))) + +;; NB// sinfo is a servdat struct +;; +(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5)) + (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") + (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable + (fullname (db:dbname->path apath dbname)) + (conns (servdat-conns sinfo)) + (mconn (rmt:get-conn sinfo apath ".db/main.db")) + (dconn (rmt:get-conn sinfo apath dbname))) + #;(if (and mconn + (not (debug:print-logger))) + (begin + (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") + (debug:print-logger rmt:log-to-main))) + (cond + ((and mconn + dconn + (< (current-seconds)(conndat-expires dconn))) + #t) ;; good to go + ((not mconn) ;; no channel open to main? open it... + (rmt:open-main-connection sinfo apath) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + ((not dconn) ;; no channel open to dbname? + (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname)))) + (case res + ((server-started) + (if (> num-tries 0) + (begin + (thread-sleep! 2) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + (begin + (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) + (exit 1)))) + (else + (if (list? res) ;; server has been registered and the info was returned. pass it on. + (begin ;; ("192.168.0.9" 53817 + ;; "5e34239f48e8973b3813221e54701a01" "24310" + ;; "192.168.0.9" + ;; "/home/matt/data/megatest/tests/simplerun" + ;; ".db/1.db") + (match + res + ((host port servkey pid ipaddr apath dbname) + (debug:print-info 0 *default-log-port* "got "res) + (hash-table-set! conns + fullname + (make-conndat + apath: apath + dbname: dbname + hostport: (conc host":"port) + ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvkey: servkey + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2)))) + (else + (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) + res) + (begin + (debug:print-info 0 *default-log-port* "Unexpected result: " res) + res))))))) + #t)) + +;;====================================================================== + +;; FOR DEBUGGING SET TO #t +;; (define *localmode* #t) +(define *localmode* #f) +(define *dbstruct* (make-dbr:dbstruct)) + +;; Defaults to current area +;; +(define (rmt:send-receive-attempted-consolidation cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (sinfo *db-serv-info*) + (dbname (db:run-id->dbname rid))) + (if (not *db-serv-info*) + (begin + (set! *db-serv-info* (make-servdat)) + (set! sinfo *db-serv-info*))) + (rmt:open-main-connection sinfo apath) + (if rid (rmt:general-open-connection sinfo apath dbname)) + ;; (if (not (member cmd '(log-to-main))) + ;; (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) + (let* ((cdat (rmt:get-conn sinfo apath dbname))) + (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn (conndat-hostport cdat) cmd params))) + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -2)) ;; two second margin for network time misalignments etc. + res)))) + +; Defaults to current area +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (sinfo *db-serv-info*) + (dbname (db:run-id->dbname rid))) + (if (not *db-serv-info*) ;; confirm this is really needed + (begin + (set! *db-serv-info* (make-servdat)) + (set! sinfo *db-serv-info*))) + (rmt:open-main-connection sinfo apath) + (if rid (rmt:general-open-connection sinfo apath dbname)) + #;(if (not (member cmd '(log-to-main))) + (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) + (rmt:send-receive-real sinfo apath dbname cmd params))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future +;; +(define (rmt:send-receive-real sinfo apath dbname cmd params) + (let* ((cdat (rmt:get-conn sinfo apath dbname))) + (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn (conndat-hostport cdat) cmd params))) + ;; since we accessed the server we can bump the expires time up + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -2)) ;; two second margin for network time misalignments etc. + res))) + +; +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future. +;; +;; Purpose - call the main.db server and request a server be started +;; for the given area path and dbname +;; + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +;; host and port are used to ensure we are remove proper records +(define (rmt:server-shutdown host port) + (let ((dbfile (servdat-dbfile *db-serv-info*))) + (debug:print-info 0 *default-log-port* "dbfile is "dbfile) + (if dbfile + (let* ((am-server (args:get-arg "-server")) + (dbfile (args:get-arg "-db")) + (apath *toppath*) + #;(sinfo *remotedat*)) ;; foundation for future fix + (if *dbstruct-db* + (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) + (db (dbr:dbdat-db dbdat)) + (inmem (dbr:dbdat-db dbdat)) ;; WRONG + ) + ;; do a final sync here + (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) + (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) + ;; let's finalize here + (debug:print-info 0 *default-log-port* "Finalizing db and inmem") + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) + (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) + (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") + (if (string-match ".*/main.db$" dbfile) + (let ((pkt-file (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) + (delete-file* pkt-file) + (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) + (db:with-lock-db + (servdat-dbfile *db-serv-info*) + (lambda (dbh dbfile) + (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove + (let* ((sdat *db-serv-info*) ;; we have a run-id server + (host (servdat-host sdat)) + (port (servdat-port sdat)) + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) + (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) + ))))))) + + +(define (common:run-sync?) + ;; (and (common:on-homehost?) + (args:get-arg "-server")) + +(define *rmt:run-mutex* (make-mutex)) +(define *rmt:run-flag* #f) + +(define (listener-running?) + (and *db-serv-info* + (servdat-uconn *db-serv-info*))) + +;; Main entry point to start a server. was start-server +(define (rmt:run hostn) + (mutex-lock! *rmt:run-mutex*) + (if *rmt:run-flag* + (begin + (debug:print-warn 0 *default-log-port* "rmt:run already running.") + (mutex-unlock! *rmt:run-mutex*)) + (begin + (set! *rmt:run-flag* #t) + (mutex-unlock! *rmt:run-mutex*) + ;; ;; Configurations for server + ;; (tcp-buffer-size 2048) + ;; (max-connections 2048) + (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") + (if (listener-running?) + (let* ((uconn (servdat-uconn *db-serv-info*))) + (wait-and-close uconn)) + (let* ((port (portlogger:open-run-close portlogger:find-port)) + (handler-proc (lambda (rem-host-port qrykey cmd params) ;; + (set! *db-last-access* (current-seconds)) + (assert (list? params) "FATAL: handler called with non-list params") + (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params) + (debug:print 0 *default-log-port* "handler call: "cmd", params="params) + (api:execute-requests *dbstruct-db* cmd params)))) + ;; (api:process-request *dbstuct-db* + (if (not *db-serv-info*) + (set! *db-serv-info* (make-servdat host: hostn port: port))) + (let* ((uconn (run-listener handler-proc port)) + (rport (udat-port uconn))) ;; the real port + (servdat-host-set! *db-serv-info* hostn) + (servdat-port-set! *db-serv-info* rport) + (servdat-uconn-set! *db-serv-info* uconn) + (wait-and-close uconn) + (db:print-current-query-stats) + ))) + (let* ((host (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (mode (or (servdat-mode *db-serv-info*) + "non-db"))) + ;; server exit stuff here + ;; (rmt:server-shutdown host port) - always do in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit + (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting") + )))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + + +;;====================================================================== +;; NEW SERVER METHOD +;;====================================================================== + +;; only use for main.db - need to re-write some of this :( +;; +(define (get-lock-db sdat dbfile host port) + (assert host "FATAL: get-lock-db called with host not set.") + (assert port "FATAL: get-lock-db called with port not set.") + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile host port)) + (uconn (servdat-uconn sdat))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? uconn (conc owner_host":"owner_port) "abc") + #f ;; locked by someone else + (begin ;; locked by someone dead and gone + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) + (sqlite3:finalize! dbh) + res)) + + +(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) + (let* ((pkt-dat `((host . ,host) + (port . ,port) + (servkey . ,servkey) + (pid . ,(current-process-id)) + (ipaddr . ,ipaddr) + (dbpath . ,dbpath))) + (uuid (write-alist->pkt + pkts-dir + pkt-dat + pktspec: pkt-spec + ptype: 'server))) + (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) + uuid)) + +(define (get-pkts-dir #!optional (apath #f)) + (let* ((effective-toppath (or *toppath* apath))) + (assert effective-toppath + "ERROR: get-pkts-dir called without *toppath* set. Exiting.") + (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) + (if (file-exists? pdir) + pdir + (begin + (handle-exceptions ;; this exception handler should NOT be needed but ... + exn + pdir + (create-directory pdir #t)) + pdir))))) + +;; given a pkts dir read +;; +(define (get-all-server-pkts pktsdir-in pktspec) + (let* ((pktsdir (if (file-exists? pktsdir-in) + pktsdir-in + (begin + (create-directory pktsdir-in #t) + pktsdir-in))) + (all-pkt-files (glob (conc pktsdir "/*.pkt")))) + (map (lambda (pkt-file) + (read-pkt->alist pkt-file pktspec: pktspec)) + all-pkt-files))) + +(define (server-address srv-pkt) + (conc (alist-ref 'host srv-pkt) ":" + (alist-ref 'port srv-pkt))) + +(define (server-ready? uconn host-port key) ;; server-address is host:port + (let* ((params `((cmd . ping)(key . ,key))) + (data `((cmd . ping) + (key . ,key) + (params . ,params))) ;; I don't get it. + (res (send-receive uconn host-port 'ping data))) + (if (eq? res 'ping-ack) ;; yep, likely it is who we want on the other end + res + #f))) +;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f)))) + +; from the pkts return servers associated with dbpath +;; NOTE: Only one can be alive - have to check on each +;; in the list of pkts returned +;; +(define (get-viable-servers serv-pkts dbpath) + (let loop ((tail serv-pkts) + (res '())) + (if (null? tail) + res ;; NOTE: sort by age so oldest is considered first + (let* ((spkt (car tail))) + (loop (cdr tail) + (if (equal? dbpath (alist-ref 'dbpath spkt)) + (cons spkt res) + res)))))) + +(define (remove-pkts-if-not-alive uconn serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (host-port (conc host":"port)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (server-ready? uconn host-port key))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) + +;; from viable servers get one that is alive and ready +;; +(define (get-the-server uconn apath serv-pkts) + (let loop ((tail serv-pkts)) + (if (null? tail) + #f + (let* ((spkt (car tail)) + (host (alist-ref 'ipaddr spkt)) + (port (alist-ref 'port spkt)) + (host-port (conc host":"port)) + (dbpth (alist-ref 'dbpath spkt)) + (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt)) + (addr (server-address spkt))) + (if (server-ready? uconn host-port srvkey) + spkt + (loop (cdr tail))))))) + +;; am I the "first" in line server? I.e. my D card is smallest +;; use Z card as tie breaker +;; +(define (get-best-candidate serv-pkts dbpath) + (if (null? serv-pkts) + #f + (let loop ((tail serv-pkts) + (best (car serv-pkts))) + (if (null? tail) + best + (let* ((candidate (car tail)) + (candidate-bd (string->number (alist-ref 'D candidate))) + (best-bd (string->number (alist-ref 'D best))) + ;; bigger number is younger + (candidate-z (alist-ref 'Z candidate)) + (best-z (alist-ref 'Z best)) + (new-best (cond + ((> best-bd candidate-bd) ;; best is younger than candidate + candidate) + ((< best-bd candidate-bd) ;; candidate is younger than best + best) + (else + (if (string>=? best-z candidate-z) + best + candidate))))) ;; use Z card as tie breaker + (if (null? tail) + new-best + (loop (cdr tail) new-best))))))) + +;;====================================================================== +;; END NEW SERVER METHOD +;;====================================================================== + +;; sdat must be defined and the host and port set and the same as previous +;; +(define (host-port-is-stable? sdat old-host old-port) + (and sdat + (let ((new-host (servdat-host sdat)) + (new-port (servdat-port sdat))) + (and new-host + new-port + (equal? new-host old-host) + (equal? new-port old-port))))) + +;; if .db/main.db check the pkts +;; +(define (rmt:wait-for-server pkts-dir db-file server-key) + (let* ((sdat *db-serv-info*)) + (let loop ((start-time (current-milliseconds)) + (changed #t) + (last-sdat "not this") + (last-host #f) + (last-port #f)) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *db-serv-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (>= (- (current-milliseconds) start-time) 100)) + (let* ((uconn (servdat-uconn sdat))) + (servdat-status-set! sdat 'iface-stable) + (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server") + ;; create a server pkt in *toppath*/.meta/srvpkts + + ;; TODO: + ;; 1. change sdat to stuct + ;; 2. add uuid to struct + ;; 3. update uuid in sdat here + ;; + (servdat-uuid-set! sdat + (register-server + pkts-dir *srvpktspec* + (get-host-name) + (servdat-port sdat) server-key + (servdat-host sdat) db-file)) + ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key + ;; now read pkts and see if we are a contender + (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) + (viables (get-viable-servers all-pkts db-file)) + (alive (remove-pkts-if-not-alive uconn viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) + ;; am I the best-srv, compare server-keys to know + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (begin + (debug:print-info 0 *default-log-port* "I'm the server!") + (servdat-dbfile-set! sdat db-file) + (servdat-status-set! sdat 'db-locked)) + (begin + (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + (begin + (debug:print-info 0 *default-log-port* + "Keys do not match "best-srv-key", "server-key", exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + sdat)) + (begin ;; sdat not yet contains server info + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (thread-sleep! 0.1) + (if (> (- (current-milliseconds) start-time) 120000) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (exit)) + (loop start-time + (not (host-port-is-stable? sdat last-host last-port)) + sdat + (servdat-host sdat) + (servdat-port sdat))))))))) + +(define (rmt:register-server sinfo apath iface port server-key dbname) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'register-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:get-count-servers sinfo apath) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) + +(define (rmt:deregister-server db-serv-info apath iface port server-key dbname) + (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db + (rmt:send-receive db-serv-info apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'deregister-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) + ;; wait until *db-serv-info* stops changing + (let* ((stime (current-seconds))) + (let loop ((last-host #f) + (last-port #f) + (tries 0)) + (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*))) + (curr-port (and *db-serv-info* (servdat-port *db-serv-info*)))) + ;; first we verify port and interface, update *db-serv-info* in need be. + (cond + ((> tries num-tries-allowed) + (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") + (exit 1)) + ((not *db-serv-info*) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not last-host)(not last-port)) + (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not (equal? last-host curr-host)) + (not (equal? last-port curr-port))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed + (thread-sleep! 0.5) + (loop curr-host curr-port (+ tries 1))) + (else + (rmt:get-signature) ;; sets *my-signature* as side effect + (servdat-status-set! *db-serv-info* 'interface-stable) + (debug:print 0 *default-log-port* + "SERVER STARTED: " curr-host + ":" curr-port + " AT " (current-seconds) " server signature: " *my-signature* + " with "(servdat-trynum *db-serv-info*)" port changes") + (flush-output *default-log-port*) + #t)))))) + +;; run rmt:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (rmt:keep-running dbname) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + + (let* ((sinfo *db-serv-info*) + (server-start-time (current-seconds)) + (pkts-dir (get-pkts-dir)) + (server-key (rmt:get-signature)) ;; This servers key + (is-main (equal? (args:get-arg "-db") ".db/main.db")) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (host port) + (set! *unclean-shutdown* #f) ;; Should not be needed anymore + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; (rmt:server-shutdown host port) -- called in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) + (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db")) + ;; main and run db servers have both got wait logic (could/should merge it) + (if is-main + (rmt:wait-for-server pkts-dir dbname server-key) + (rmt:wait-for-stable-interface)) + ;; this is our forever loop + (let* ((iface (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (uconn (servdat-uconn *db-serv-info*))) + (let loop ((count 0) + (bad-sync-count 0) + (start-time (current-milliseconds))) + (if (and (not is-main) + (common:low-noise-print 60 "servdat-status")) + (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*))) + + (mutex-lock! *heartbeat-mutex*) + ;; set up the database handle + (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate + (let ((watchdog #f)) ;; (bdat-watchdog *bdat*))) + (debug:print 0 *default-log-port* "SERVER: dbprep") + (db:setup dbname) ;; sets *dbstruct-db* as side effect + (servdat-status-set! *db-serv-info* 'db-opened) + ;; IFF I'm not main, call into main and register self + (if (not is-main) + (let ((res (rmt:register-server sinfo + *toppath* iface port + server-key dbname))) + (if res ;; we are the server + (servdat-status-set! *db-serv-info* 'have-interface-and-db) + ;; now check that the db locker is alive, clear it out if not + (let* ((serv-info (rmt:server-info *toppath* dbname))) + (match serv-info + ((host port servkey pid ipaddr apath dbpath) + (if (not (server-ready? uconn (conc host":"port) servkey)) + (begin + (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") + (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) + (loop (+ count 1) bad-sync-count start-time)))) + (else + (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) + (exit))))))) + (debug:print 0 *default-log-port* + "SERVER: running, db "dbname" opened, megatest version: " + (common:get-full-version)) + )) + + (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) + + (mutex-unlock! *heartbeat-mutex*) + + ;; when things go wrong we don't want to be doing the various + ;; queries too often so we strive to run this stuff only every + ;; four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (set! last-access *db-last-access*) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) bad-sync-count (current-milliseconds))) + + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence (get-host-name) port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port)) + ((and *server-run* + (or (not (timed-out?)) + (if is-main ;; do not exit if there are other servers (keep main open until all others gone) + (> (rmt:get-count-servers sinfo *toppath*) 1) + #f))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) + (loop 0 bad-sync-count (current-milliseconds))) + (else + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port) + #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " + (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown + (sexpr->string 'quit)))))))))) + +(define (rmt:get-reasonable-hostname) + (let* ((inhost (or (args:get-arg "-server") "-"))) + (if (equal? inhost "-") + (get-host-name) + inhost))) + +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; This is the point at which servers are started +;; +(define (rmt:server-launch dbname) + (debug:print-info 0 *default-log-port* "Entered rmt:server-launch") + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (rmt:run (rmt:get-reasonable-hostname))) + "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (if (args:get-arg "-server") + (rmt:keep-running dbname) + #;(rmt:wait-for-stable-interface))) + "Keep running"))) + (thread-start! th2) + (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (thread-join! th3)) + #f) + +;;====================================================================== +;; S E R V E R - D I R E C T C A L L S +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server #f (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server #f (list run-id))) + +(define (rmt:server-info apath dbname) + (rmt:send-receive 'get-server-info #f (list apath dbname))) + + +) ADDED ulex-dual/ulex.dot Index: ulex-dual/ulex.dot ================================================================== --- /dev/null +++ ulex-dual/ulex.dot @@ -0,0 +1,136 @@ +// Copyright 2006-2017, Matthew Welland. +// +// This file is part of Megatest. +// +// Megatest is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Megatest is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Megatest. If not, see . + +digraph G { + + // graph[center=true, margin=0.2, nodesep=0.1, ranksep=0.3] + + layout=neato; + // layout=fdp; + // overlap=scalexy; //false, compress, ... + overlap=scalexy; + // sep="+1"; // 0.1, +1 + sep="-0.1"; + + user_program [label="user program"]; + + subgraph cluster_1 { + node [style=filled,shape=oval]; + label = "caller"; + color=brown; + + send_receive [label="(send-receive uconn\n host-port qrykey cmd data)"]; + send [label="(send uconn host-port\n qrykey cmd data)"]; + ulex_cmd_loopcaller [label="(ulex-cmd-loop uconn)"]; + ulex_handlercaller [label="(ulex-handler uconn rdat)"]; + mailbox [label="mailbox\n\nrdat\n...",shape=box]; + + send_receive -> send; + ulex_cmd_loopcaller -> ulex_handlercaller; + ulex_handlercaller -> mailbox; + mailbox -> send_receive; + } + + subgraph cluster_2 { + node [shape=oval]; + label = "listener"; + color=green; + + ulex_cmd_loop [label="(ulex-cmd-loop uconn)"]; + ulex_handler [label="(ulex-handler \nuconn rdat)"]; + add_to_work_queue [label="(add-to-work-queue\n uconn rdat)"]; + queue [label="queue\n\nrdat\n...",shape=box]; + process_work_queue [label="(process-work-queue uconn)"]; + do_work [label="(do-work uconn rdat)\nrdat: '(rem-host-port qrykey cmd params)"]; + user_proc [label="(proc rem-host-port\n qrykey cmd params)\n;; proc supplied by user"]; + sendlis [label="(send uconn host-port\n qrykey 'response result)"]; + + ulex_cmd_loop -> ulex_handler [label="rdat"]; + ulex_handler -> add_to_work_queue [label="rdat"]; + + add_to_work_queue -> queue [label="rdat"]; + + subgraph cluster_3 { + label = "remote work"; + color=blue; + + queue -> process_work_queue [label="rdat"]; + process_work_queue -> do_work [label="rdat"]; + do_work -> user_proc; // [label="rdat: '(rem-host-port\n qrykey cmd params)"]; + } + } + + user_proc -> sendlis; + user_program -> send_receive; + send_receive -> user_program; + + send -> ulex_cmd_loop [label="rdat: '(host-port\n qrykey cmd data)"]; + sendlis -> ulex_cmd_loopcaller [label="rdat: '(host-port qrykey\n 'response result)"]; + ulex_handler -> send [label="'ack"]; + ulex_handlercaller -> sendlis [label="'ack"]; + +} + + +// check_available_queue -> remove_entries_over_10s_old; +// remove_entries_over_10s_old -> set_available [label="num_avail < 3"]; +// remove_entries_over_10s_old -> exit [label="num_avail > 2"]; +// +// set_available -> delay_2s; +// delay_2s -> check_place_in_queue; +// +// check_place_in_queue -> "http:transport-launch" [label="at head"]; +// check_place_in_queue -> exit [label="not at head"]; +// +// "client:login" -> "server:shutdown" [label="login failed"]; +// "server:shutdown" -> exit; +// +// subgraph cluster_2 { +// "http:transport-launch" -> "http:transport-run"; +// "http:transport-launch" -> "http:transport-keep-running"; +// +// "http:transport-keep-running" -> "tests running?"; +// "tests running?" -> "client:login" [label=yes]; +// "tests running?" -> "server:shutdown" [label=no]; +// "client:login" -> delay_5s [label="login ok"]; +// delay_5s -> "http:transport-keep-running"; +// } +// + // start_server -> "server_running?"; + // "server_running?" -> set_available [label="no"]; + // "server_running?" -> delay_2s [label="yes"]; + // delay_2s -> "still_running?"; + // "still_running?" -> ping_server [label=yes]; + // "still_running?" -> set_available [label=no]; + // ping_server -> exit [label=alive]; + // ping_server -> remove_server_record [label=dead]; + // remove_server_record -> set_available; + // set_available -> avail_delay [label="delay 3s"]; + // avail_delay -> "first_in_queue?"; + // + // "first_in_queue?" -> set_running [label=yes]; + // set_running -> get_next_port -> handle_requests; + // "first_in_queue?" -> "dead_entry_in_queue?" [label=no]; + // "dead_entry_in_queue?" -> "server_running?" [label=no]; + // "dead_entry_in_queue?" -> "remove_dead_entries" [label=yes]; + // remove_dead_entries -> "server_running?"; + // + // handle_requests -> start_shutdown [label="no traffic\nno running tests"]; + // handle_requests -> shutdown_request; + // start_shutdown -> shutdown_delay; + // shutdown_request -> shutdown_delay; + // shutdown_delay -> exit; ADDED ulex-dual/ulex.pdf Index: ulex-dual/ulex.pdf ================================================================== --- /dev/null +++ ulex-dual/ulex.pdf cannot compute difference between binary files ADDED ulex-dual/ulex.png Index: ulex-dual/ulex.png ================================================================== --- /dev/null +++ ulex-dual/ulex.png cannot compute difference between binary files ADDED ulex-dual/ulex.scm Index: ulex-dual/ulex.scm ================================================================== --- /dev/null +++ ulex-dual/ulex.scm @@ -0,0 +1,352 @@ +;; ulex: Distributed sqlite3 db +;;; +;; Copyright (C) 2018-2021 Matt Welland +;; Redistribution and use in source and binary forms, with or without +;; modification, is permitted. +;; +;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +;; DAMAGE. + +;;====================================================================== +;; ABOUT: +;; See README in the distribution at https://www.kiatoa.com/fossils/ulex +;; NOTES: +;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. +;; +;;====================================================================== + +(module ulex + * + #;( + + ;; NOTE: looking for the handler proc - find the run-listener :) + + run-listener ;; (run-listener handler-proc [port]) => uconn + + ;; NOTE: handler-proc params; + ;; (handler-proc rem-host-port qrykey cmd params) + + send-receive ;; (send-receive uconn host-port cmd data) + + ;; NOTE: cmd can be any plain text symbol except for these; + ;; 'ping 'ack 'goodbye 'response + + set-work-handler ;; (set-work-handler proc) + + wait-and-close ;; (wait-and-close uconn) + + ulex-listener? + + ;; needed to get the interface:port that was automatically found + udat-port + udat-host-port + + ;; for testing only + ;; pp-uconn + + ;; parameters + work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct + return-method ;; parameter; 'mailbox, 'polling, 'direct + ) + +(import scheme + chicken.base + chicken.file + chicken.io + chicken.time + chicken.condition + chicken.port + chicken.string + chicken.sort + chicken.pretty-print + chicken.tcp + + address-info + mailbox + matchable + ;; queues + regex + regex-case + simple-exceptions + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + system-information + ;; tcp6 + tcp-server + typed-records + + md5 + message-digest + (prefix base64 base64:) + z3 + ) + +;; udat struct, used by both caller and callee +;; instantiated as uconn by convention +;; +(defstruct udat + ;; the listener side + (port #f) + (host-port #f) ;; my host:port + (socket #f) + ;; the peers + (peers (make-hash-table)) ;; host:port->peer + ;; work handling + (work-queue (make-mailbox)) + (work-proc #f) ;; set by user + (cnum 0) ;; cookie number + (mboxes (make-hash-table)) ;; for the replies + (avail-cmboxes '()) ;; list of ( . ) for re-use + ;; threads + (numthreads 10) + (cmd-thread #f) + (work-queue-thread #f) + (num-threads-running 0) + ) + +;;====================================================================== +;; serialization +;; NOTE: I've had problems with read/write and s11n serialize, deserialize +;; thus the inefficient method here +;;====================================================================== + +(define serializing-method (make-parameter 'complex)) + + +;; NOTE: Can remove the regex and base64 encoding for zmq +(define (obj->string obj) + (case (serializing-method) + ((complex) + (string-substitute + (regexp "=") "_" + (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda ()(serialize obj))))) ;; BB: serialize - this is + ;; what causes problems + ;; between different builds of + ;; megatest communicating. + ;; serialize is sensitive to + ;; binary image of mtest. + #t)) + ((write)(with-output-to-string (lambda ()(write obj)))) + ((s11n) (with-output-to-string (lambda ()(serialize obj)))) + (else obj))) ;; rpc + +(define (string->obj msg #!key (transport 'http)) + (case (serializing-method) + ((complex) + (handle-exceptions + exn + (begin + (print "ULEX ERROR: cannot translate received data \""msg"\"") + (print-call-chain (current-error-port)) + msg) + (with-input-from-string + (z3:decode-buffer + (base64:base64-decode + (string-substitute + (regexp "_") "=" msg #t))) + (lambda ()(deserialize))))) + ((write)(with-input-from-string msg (lambda ()(read)))) + ((s11n)(with-input-from-string msg (lambda ()(deserialize)))) + (else msg))) ;; rpc + + +;;====================================================================== +;; listener +;;====================================================================== + +;; is uconn a ulex connector (listener) +;; +(define (ulex-listener? uconn) + (udat? uconn)) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (setup-listener uconn #!optional (port 4242)) + (handle-exceptions + exn + (if (< port 65535) + (setup-listener uconn (+ port 1)) + #f) + (connect-listener uconn port))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (udat-port-set! uconn port) + (udat-host-port-set! uconn (conc addr":"port)) + (udat-socket-set! uconn tlsn) + uconn)) + +;; run-listener does all the work of starting a listener in a thread +;; it then returns control +;; +(define (run-listener handler-proc #!optional (port-suggestion 4242)) + (let* ((uconn (make-udat))) + (udat-work-proc-set! uconn handler-proc) + (if (setup-listener uconn port-suggestion) + (let* ((orig-in (current-input-port)) + (orig-out (current-output-port))) + ((make-tcp-server + (udat-socket uconn) + (lambda () + (let* ((rdat + (string->obj (read)) + ;; (read in) + ;; (deserialize) + ) + (resp (let ((tcp-in (current-input-port)) + (tcp-out (current-output-port))) + (current-input-port orig-in) + (current-output-port orig-out) + (let ((res (do-work uconn rdat))) + (current-input-port tcp-in) + (current-output-port tcp-out) + res)))) + (write (obj->string resp)) + ;; (serialize resp) + ;; (write resp out) + ))))) + (assert #f "ERROR: run-listener called without proper setup.")))) + +(define (wait-and-close uconn) + (thread-join! (udat-cmd-thread uconn)) + (tcp-close (udat-socket uconn))) + +;;======================================================================== +;; peers and connections +;;======================================================================== + +(define *send-mutex* (make-mutex)) + +;; send structured data to recipient +;; +;; NOTE: qrykey is what was called the "cookie" previously +;; +;; retval tells send to expect and wait for return data (one line) and return it or time out +;; this is for ping where we don't want to necessarily have set up our own server yet. +;; +(define (send-receive udata host-port cmd params) + (let* ((host-port-lst (string-split host-port ":")) + (host (car host-port-lst)) + (port (string->number (cadr host-port-lst))) + (my-host-port (and udata (udat-host-port udata))) ;; remote will return to this + (isme (equal? host-port my-host-port)) ;; calling myself? + ;; dat is a self-contained work block that can be sent or handled locally + (dat (list `(host-port . ,my-host-port) + `(qrykey . qrykey) + `(cmd . ,cmd) + `(params . ,params)))) + (cond + (isme (do-work udata dat)) ;; no transmission needed + (else + (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? + exn + (begin + (print "ULEX send-receive: "cmd", "params", exn="exn) + (message exn)) + (begin + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP + (let-values (((inp oup)(tcp-connect host port))) + (let ((res (if (and inp oup) + (begin + (write (obj->string dat) oup) + (close-output-port oup) + (string->obj (read inp))) + (begin + (print "ERROR: send called but no receiver has been setup. Please call setup first!") + #f)))) + (close-input-port inp) + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + res)))))))) ;; res will always be 'ack unless return-method is direct + +;;====================================================================== +;; work queues - this is all happening on the listener side +;;====================================================================== + +;; move the logic to return the result somewhere else? +;; +(define (do-work uconn rdat) + ;; put this following into a do-work procedure + (match rdat + ((rem-host-port qrykey cmd params) + (case cmd + ((ping) 'ping-ack) ;; bypass calling the proc + (else + (let* ((proc (udat-work-proc uconn)) + (start-time (current-milliseconds)) + (result (with-output-to-port (current-error-port) + (lambda () + (proc rem-host-port qrykey cmd params)))) + (end-time (current-milliseconds)) + (run-time (- end-time start-time))) + result)))) + (else + (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))) + +;;====================================================================== +;; misc utils +;;====================================================================== + +(define (pp-uconn uconn) + (pp (udat->alist uconn))) + +;;====================================================================== +;; network utilities +;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + +(define (get-my-best-address) + (let ((all-my-addresses (get-all-ips))) + (cond + ((null? all-my-addresses) + (get-host-name)) ;; no interfaces? + ((eq? (length all-my-addresses) 1) + (car all-my-addresses)) ;; only one to choose from, just go with it + (else + (car (sort all-my-addresses ip-pref-less?)))))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips) + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +) ADDED ulex-full/dbmgr.scm Index: ulex-full/dbmgr.scm ================================================================== --- /dev/null +++ ulex-full/dbmgr.scm @@ -0,0 +1,1131 @@ +;;====================================================================== +;; Copyright 2022, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbmgrmod)) + +(declare (uses ulex)) +(declare (uses apimod)) +(declare (uses pkts)) +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses portloggermod)) +(declare (uses debugprint)) + +(module dbmgrmod + * + +(import scheme + chicken.base + chicken.condition + chicken.file + chicken.format + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + + (prefix sqlite3 sqlite3:) + matchable + md5 + message-digest + regex + s11n + srfi-1 + srfi-18 + srfi-69 + system-information + typed-records + + pkts + ulex + + commonmod + apimod + dbmod + debugprint + (prefix mtargs args:) + portloggermod + ) + +;; Configurations for server +;; (tcp-buffer-size 2048) +;; (max-connections 2048) + +;; info about me as a listener and my connections to db servers +;; stored (for now) in *db-serv-info* +;; +(defstruct servdat + (host #f) + (port #f) + (uuid #f) + (dbfile #f) + (uconn #f) ;; this is the listener *FOR THIS PROCESS* + (mode #f) + (status 'starting) + (trynum 0) ;; count the number of ports we've tried + (conns (make-hash-table)) ;; apath/dbname => conndat + ) + +(define *db-serv-info* (make-servdat)) + +(define (servdat->url sdat) + (conc (servdat-host sdat)":"(servdat-port sdat))) + +;; db servers contact info +;; +(defstruct conndat + (apath #f) + (dbname #f) + (fullname #f) + (hostport #f) + (ipaddr #f) + (port #f) + (srvpkt #f) + (srvkey #f) + (lastmsg 0) + (expires 0)) + +(define *srvpktspec* + `((server (host . h) + (port . p) + (servkey . k) + (pid . i) + (ipaddr . a) + (dbpath . d)))) + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +;; set up the api proc, seems like there should be a better place for this? +;; +;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE +;; +;; (define api-proc (make-parameter conc)) +;; (api-proc api:execute-requests) + +;; do we have a connection to apath dbname and +;; is it not expired? then return it +;; +;; else setup a connection +;; +;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception +;; +(define (rmt:get-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-ref/default (servdat-conns remdat) fullname #f))) + +(define (rmt:drop-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-delete! (servdat-conns remdat) fullname))) + +(define (rmt:find-main-server uconn apath dbname) + (let* ((pktsdir (get-pkts-dir apath)) + (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) + (viable-srvs (get-viable-servers all-srvpkts dbname))) + (get-the-server uconn apath viable-srvs))) + + +(define *connstart-mutex* (make-mutex)) +(define *last-main-start* 0) + +;; looks for a connection to main, returns if have and not exired +;; creates new otherwise +;; +;; connections for other servers happens by requesting from main +;; +;; TODO: This is unnecessarily re-creating the record in the hash table +;; +(define (rmt:open-main-connection remdat apath) + (let* ((fullpath (db:dbname->path apath ".db/main.db")) + (conns (servdat-conns remdat)) + (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this + (start-rmt:run (lambda () + (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server"))) + (thread-start! th1) + (thread-sleep! 1) + (let loop ((count 0)) + (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection") + (if (or (not *db-serv-info*) + (not (servdat-uconn *db-serv-info*))) + (begin + (thread-sleep! 1) + (loop (+ count 1))) + (begin + (servdat-mode-set! *db-serv-info* 'non-db) + (servdat-uconn *db-serv-info*))))))) + (myconn (servdat-uconn *db-serv-info*))) + (cond + ((not myconn) + (start-rmt:run) + (rmt:open-main-connection remdat apath)) + ((and conn ;; conn is NOT a socket, just saying ... + (< (current-seconds) (conndat-expires conn))) + #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died + ((and conn + (>= (current-seconds)(conndat-expires conn))) + (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") + (rmt:drop-conn remdat apath ".db/main.db") ;; + (rmt:open-main-connection remdat apath)) + (else + ;; Below we will find or create and connect to main + (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch") + (let* ((dbname (db:run-id->dbname #f)) + (the-srv (rmt:find-main-server myconn apath dbname)) + (start-main-srv (lambda () ;; call IF there is no the-srv found + (mutex-lock! *connstart-mutex*) + (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server + (begin + (api:run-server-process apath dbname) + (set! *last-main-start* (current-seconds)) + (thread-sleep! 1)) + (thread-sleep! 0.25)) + (mutex-unlock! *connstart-mutex*) + (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries + ))) + (if (not the-srv) ;; have server, try connecting to it + (start-main-srv) + (let* ((srv-addr (server-address the-srv)) ;; need serv + (ipaddr (alist-ref 'ipaddr the-srv)) + (port (alist-ref 'port the-srv)) + (srvkey (alist-ref 'servkey the-srv)) + (fullpath (db:dbname->path apath dbname)) + + (new-the-srv (make-conndat + apath: apath + dbname: dbname + fullname: fullpath + hostport: srv-addr + ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvpkt: the-srv + srvkey: srvkey ;; generated by rmt:get-signature on the server side + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2) ;; this needs to be gathered during the ping + ))) + (hash-table-set! conns fullpath new-the-srv))) + #t))))) + +;; NB// sinfo is a servdat struct +;; +(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5)) + (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") + (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable + (fullname (db:dbname->path apath dbname)) + (conns (servdat-conns sinfo)) + (mconn (rmt:get-conn sinfo apath ".db/main.db")) + (dconn (rmt:get-conn sinfo apath dbname))) + #;(if (and mconn + (not (debug:print-logger))) + (begin + (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") + (debug:print-logger rmt:log-to-main))) + (cond + ((and mconn + dconn + (< (current-seconds)(conndat-expires dconn))) + #t) ;; good to go + ((not mconn) ;; no channel open to main? open it... + (rmt:open-main-connection sinfo apath) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + ((not dconn) ;; no channel open to dbname? + (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname)))) + (case res + ((server-started) + (if (> num-tries 0) + (begin + (thread-sleep! 2) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + (begin + (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) + (exit 1)))) + (else + (if (list? res) ;; server has been registered and the info was returned. pass it on. + (begin ;; ("192.168.0.9" 53817 + ;; "5e34239f48e8973b3813221e54701a01" "24310" + ;; "192.168.0.9" + ;; "/home/matt/data/megatest/tests/simplerun" + ;; ".db/1.db") + (match + res + ((host port servkey pid ipaddr apath dbname) + (debug:print-info 0 *default-log-port* "got "res) + (hash-table-set! conns + fullname + (make-conndat + apath: apath + dbname: dbname + hostport: (conc host":"port) + ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvkey: servkey + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2)))) + (else + (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) + res) + (begin + (debug:print-info 0 *default-log-port* "Unexpected result: " res) + res))))))) + #t)) + +;;====================================================================== + +;; FOR DEBUGGING SET TO #t +;; (define *localmode* #t) +(define *localmode* #f) +(define *dbstruct* (make-dbr:dbstruct)) + +;; Defaults to current area +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (sinfo *db-serv-info*) + (dbname (db:run-id->dbname rid))) + (if *localmode* + (api:execute-requests *dbstruct* cmd params) + (begin + (rmt:open-main-connection sinfo apath) + (if rid (rmt:general-open-connection sinfo apath dbname)) + #;(if (not (member cmd '(log-to-main))) + (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) + (rmt:send-receive-real sinfo apath dbname cmd params))))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future +;; +(define (rmt:send-receive-real sinfo apath dbname cmd params) + (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.") + (let* ((cdat (rmt:get-conn sinfo apath dbname))) + (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn (conndat-hostport cdat) cmd params)) + #;(th1 (make-thread (lambda () + (set! res (send-receive uconn (conndat-hostport cdat) cmd params))) + "send-receive thread"))) + ;; (thread-start! th1) + ;; (thread-join! th1) ;; gratuitious thread stuff is so that mailbox is not used in primordial thead + ;; since we accessed the server we can bump the expires time up + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -2)) ;; two second margin for network time misalignments etc. + res))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future. +;; +;; Purpose - call the main.db server and request a server be started +;; for the given area path and dbname +;; + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +;; host and port are used to ensure we are remove proper records +(define (rmt:server-shutdown host port) + (let ((dbfile (servdat-dbfile *db-serv-info*))) + (debug:print-info 0 *default-log-port* "dbfile is "dbfile) + (if dbfile + (let* ((am-server (args:get-arg "-server")) + (dbfile (args:get-arg "-db")) + (apath *toppath*) + #;(sinfo *remotedat*)) ;; foundation for future fix + (if *dbstruct-db* + (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) + (db (dbr:dbdat-db dbdat)) + (inmem (dbr:dbdat-db dbdat)) ;; WRONG + ) + ;; do a final sync here + (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) + (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) + ;; let's finalize here + (debug:print-info 0 *default-log-port* "Finalizing db and inmem") + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) + (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) + (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") + (if (string-match ".*/main.db$" dbfile) + (let ((pkt-file (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) + (delete-file* pkt-file) + (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) + (db:with-lock-db + (servdat-dbfile *db-serv-info*) + (lambda (dbh dbfile) + (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove + (let* ((sdat *db-serv-info*) ;; we have a run-id server + (host (servdat-host sdat)) + (port (servdat-port sdat)) + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) + (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) + ))))))) + + +(define (common:run-sync?) + ;; (and (common:on-homehost?) + (args:get-arg "-server")) + +(define *rmt:run-mutex* (make-mutex)) +(define *rmt:run-flag* #f) + +;; Main entry point to start a server. was start-server +(define (rmt:run hostn) + (mutex-lock! *rmt:run-mutex*) + (if *rmt:run-flag* + (begin + (debug:print-warn 0 *default-log-port* "rmt:run already running.") + (mutex-unlock! *rmt:run-mutex*)) + (begin + (set! *rmt:run-flag* #t) + (mutex-unlock! *rmt:run-mutex*) + ;; ;; Configurations for server + ;; (tcp-buffer-size 2048) + ;; (max-connections 2048) + (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") + (if (and *db-serv-info* + (servdat-uconn *db-serv-info*)) + (let* ((uconn (servdat-uconn *db-serv-info*))) + (wait-and-close uconn)) + (let* ((port (portlogger:open-run-close portlogger:find-port)) + (handler-proc (lambda (rem-host-port qrykey cmd params) ;; + (set! *db-last-access* (current-seconds)) + (assert (list? params) "FATAL: handler called with non-list params") + (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params) + (debug:print 0 *default-log-port* "handler call: "cmd", params="params) + (api:execute-requests *dbstruct-db* cmd params)))) + ;; (api:process-request *dbstuct-db* + (if (not *db-serv-info*) + (set! *db-serv-info* (make-servdat host: hostn port: port))) + (let* ((uconn (run-listener handler-proc port)) + (rport (udat-port uconn))) ;; the real port + (servdat-host-set! *db-serv-info* hostn) + (servdat-port-set! *db-serv-info* rport) + (servdat-uconn-set! *db-serv-info* uconn) + (wait-and-close uconn) + (db:print-current-query-stats) + ))) + (let* ((host (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (mode (or (servdat-mode *db-serv-info*) + "non-db"))) + ;; server exit stuff here + ;; (rmt:server-shutdown host port) - always do in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit + (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting") + )))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + + +;;====================================================================== +;; NEW SERVER METHOD +;;====================================================================== + +;; only use for main.db - need to re-write some of this :( +;; +(define (get-lock-db sdat dbfile host port) + (assert host "FATAL: get-lock-db called with host not set.") + (assert port "FATAL: get-lock-db called with port not set.") + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile host port)) + (uconn (servdat-uconn sdat))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? uconn (conc owner_host":"owner_port) "abc") + #f ;; locked by someone else + (begin ;; locked by someone dead and gone + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) + (sqlite3:finalize! dbh) + res)) + + +(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) + (let* ((pkt-dat `((host . ,host) + (port . ,port) + (servkey . ,servkey) + (pid . ,(current-process-id)) + (ipaddr . ,ipaddr) + (dbpath . ,dbpath))) + (uuid (write-alist->pkt + pkts-dir + pkt-dat + pktspec: pkt-spec + ptype: 'server))) + (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) + uuid)) + +(define (get-pkts-dir #!optional (apath #f)) + (let* ((effective-toppath (or *toppath* apath))) + (assert effective-toppath + "ERROR: get-pkts-dir called without *toppath* set. Exiting.") + (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) + (if (file-exists? pdir) + pdir + (begin + (handle-exceptions ;; this exception handler should NOT be needed but ... + exn + pdir + (create-directory pdir #t)) + pdir))))) + +;; given a pkts dir read +;; +(define (get-all-server-pkts pktsdir-in pktspec) + (let* ((pktsdir (if (file-exists? pktsdir-in) + pktsdir-in + (begin + (create-directory pktsdir-in #t) + pktsdir-in))) + (all-pkt-files (glob (conc pktsdir "/*.pkt")))) + (map (lambda (pkt-file) + (read-pkt->alist pkt-file pktspec: pktspec)) + all-pkt-files))) + +(define (server-address srv-pkt) + (conc (alist-ref 'host srv-pkt) ":" + (alist-ref 'port srv-pkt))) + +(define (server-ready? uconn host-port key) ;; server-address is host:port + (let* ((params `((cmd . ping)(key . ,key))) + (data `((cmd . ping) + (key . ,key) + (params . ,params))) ;; I don't get it. + (res (send-receive uconn host-port 'ping data))) + (if (eq? res 'ack) ;; yep, likely it is who we want on the other end + res + #f))) +;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f)))) + +; from the pkts return servers associated with dbpath +;; NOTE: Only one can be alive - have to check on each +;; in the list of pkts returned +;; +(define (get-viable-servers serv-pkts dbpath) + (let loop ((tail serv-pkts) + (res '())) + (if (null? tail) + res ;; NOTE: sort by age so oldest is considered first + (let* ((spkt (car tail))) + (loop (cdr tail) + (if (equal? dbpath (alist-ref 'dbpath spkt)) + (cons spkt res) + res)))))) + +(define (remove-pkts-if-not-alive uconn serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (host-port (conc host":"port)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (server-ready? uconn host-port key))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) + +;; from viable servers get one that is alive and ready +;; +(define (get-the-server uconn apath serv-pkts) + (let loop ((tail serv-pkts)) + (if (null? tail) + #f + (let* ((spkt (car tail)) + (host (alist-ref 'ipaddr spkt)) + (port (alist-ref 'port spkt)) + (host-port (conc host":"port)) + (dbpth (alist-ref 'dbpath spkt)) + (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt)) + (addr (server-address spkt))) + (if (server-ready? uconn host-port srvkey) + spkt + (loop (cdr tail))))))) + +;; am I the "first" in line server? I.e. my D card is smallest +;; use Z card as tie breaker +;; +(define (get-best-candidate serv-pkts dbpath) + (if (null? serv-pkts) + #f + (let loop ((tail serv-pkts) + (best (car serv-pkts))) + (if (null? tail) + best + (let* ((candidate (car tail)) + (candidate-bd (string->number (alist-ref 'D candidate))) + (best-bd (string->number (alist-ref 'D best))) + ;; bigger number is younger + (candidate-z (alist-ref 'Z candidate)) + (best-z (alist-ref 'Z best)) + (new-best (cond + ((> best-bd candidate-bd) ;; best is younger than candidate + candidate) + ((< best-bd candidate-bd) ;; candidate is younger than best + best) + (else + (if (string>=? best-z candidate-z) + best + candidate))))) ;; use Z card as tie breaker + (if (null? tail) + new-best + (loop (cdr tail) new-best))))))) + + +;;====================================================================== +;; END NEW SERVER METHOD +;;====================================================================== + +;; if .db/main.db check the pkts +;; +(define (rmt:wait-for-server pkts-dir db-file server-key) + (let* ((sdat *db-serv-info*)) + (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *db-serv-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (let* ((uconn (servdat-uconn sdat))) + (servdat-status-set! sdat 'iface-stable) + (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server") + ;; create a server pkt in *toppath*/.meta/srvpkts + + ;; TODO: + ;; 1. change sdat to stuct + ;; 2. add uuid to struct + ;; 3. update uuid in sdat here + ;; + (servdat-uuid-set! sdat + (register-server + pkts-dir *srvpktspec* + (get-host-name) + (servdat-port sdat) server-key + (servdat-host sdat) db-file)) + ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key + ;; now read pkts and see if we are a contender + (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) + (viables (get-viable-servers all-pkts db-file)) + (alive (remove-pkts-if-not-alive uconn viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) + ;; am I the best-srv, compare server-keys to know + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (begin + (debug:print-info 0 *default-log-port* "I'm the server!") + (servdat-dbfile-set! sdat db-file) + (servdat-status-set! sdat 'db-locked)) + (begin + (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + (begin + (debug:print-info 0 *default-log-port* + "Keys do not match "best-srv-key", "server-key", exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + sdat)) + (begin ;; sdat not yet contains server info + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat)))))))) + +(define (rmt:register-server sinfo apath iface port server-key dbname) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'register-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:get-count-servers sinfo apath) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) + +(define (rmt:deregister-server db-serv-info apath iface port server-key dbname) + (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db + (rmt:send-receive-real db-serv-info apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'deregister-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) + ;; wait until *db-serv-info* stops changing + (let* ((stime (current-seconds))) + (let loop ((last-host #f) + (last-port #f) + (tries 0)) + (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*))) + (curr-port (and *db-serv-info* (servdat-port *db-serv-info*)))) + ;; first we verify port and interface, update *db-serv-info* in need be. + (cond + ((> tries num-tries-allowed) + (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") + (exit 1)) + ((not *db-serv-info*) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not last-host)(not last-port)) + (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not (equal? last-host curr-host)) + (not (equal? last-port curr-port))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed + (thread-sleep! 0.5) + (loop curr-host curr-port (+ tries 1))) + (else + (rmt:get-signature) ;; sets *my-signature* as side effect + (servdat-status-set! *db-serv-info* 'interface-stable) + (debug:print 0 *default-log-port* + "SERVER STARTED: " curr-host + ":" curr-port + " AT " (current-seconds) " server signature: " *my-signature* + " with "(servdat-trynum *db-serv-info*)" port changes") + (flush-output *default-log-port*) + #t)))))) + +;; run rmt:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (rmt:keep-running dbname) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + + (let* ((sinfo *db-serv-info*) + (server-start-time (current-seconds)) + (pkts-dir (get-pkts-dir)) + (server-key (rmt:get-signature)) ;; This servers key + (is-main (equal? (args:get-arg "-db") ".db/main.db")) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (host port) + (set! *unclean-shutdown* #f) ;; Should not be needed anymore + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; (rmt:server-shutdown host port) -- called in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) + (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db")) + ;; main and run db servers have both got wait logic (could/should merge it) + (if is-main + (rmt:wait-for-server pkts-dir dbname server-key) + (rmt:wait-for-stable-interface)) + ;; this is our forever loop + (let* ((iface (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (uconn (servdat-uconn *db-serv-info*))) + (let loop ((count 0) + (bad-sync-count 0) + (start-time (current-milliseconds))) + (if (and (not is-main) + (common:low-noise-print 60 "servdat-status")) + (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*))) + + (mutex-lock! *heartbeat-mutex*) + ;; set up the database handle + (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate + (let ((watchdog (bdat-watchdog *bdat*))) + (debug:print 0 *default-log-port* "SERVER: dbprep") + (db:setup dbname) ;; sets *dbstruct-db* as side effect + (servdat-status-set! *db-serv-info* 'db-opened) + ;; IFF I'm not main, call into main and register self + (if (not is-main) + (let ((res (rmt:register-server sinfo + *toppath* iface port + server-key dbname))) + (if res ;; we are the server + (servdat-status-set! *db-serv-info* 'have-interface-and-db) + ;; now check that the db locker is alive, clear it out if not + (let* ((serv-info (rmt:server-info *toppath* dbname))) + (match serv-info + ((host port servkey pid ipaddr apath dbpath) + (if (not (server-ready? uconn (conc host":"port) servkey)) + (begin + (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") + (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) + (loop (+ count 1) bad-sync-count start-time)))) + (else + (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) + (exit))))))) + (debug:print 0 *default-log-port* + "SERVER: running, db "dbname" opened, megatest version: " + (common:get-full-version)) + ;; start the watchdog + + ;; is this really needed? + + #;(if watchdog + (if (not (member (thread-state watchdog) + '(ready running blocked + sleeping dead))) + (begin + (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")") + (thread-start! watchdog)) + (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")")) + (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it.")) + #;(loop (+ count 1) bad-sync-count start-time) + )) + + (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) + + (mutex-unlock! *heartbeat-mutex*) + + ;; when things go wrong we don't want to be doing the various + ;; queries too often so we strive to run this stuff only every + ;; four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (set! last-access *db-last-access*) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) bad-sync-count (current-milliseconds))) + + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence (get-host-name) port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port)) + ((and *server-run* + (or (not (timed-out?)) + (if is-main ;; do not exit if there are other servers (keep main open until all others gone) + (> (rmt:get-count-servers sinfo *toppath*) 1) + #f))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) + (loop 0 bad-sync-count (current-milliseconds))) + (else + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port) + #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " + (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown + (sexpr->string 'quit)))))))))) + +(define (rmt:get-reasonable-hostname) + (let* ((inhost (or (args:get-arg "-server") "-"))) + (if (equal? inhost "-") + (get-host-name) + inhost))) + +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; This is the point at which servers are started +;; +(define (rmt:server-launch dbname) + (debug:print-info 0 *default-log-port* "Entered rmt:server-launch") + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (rmt:run (rmt:get-reasonable-hostname))) + "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (if (args:get-arg "-server") + (rmt:keep-running dbname))) + "Keep running"))) + (thread-start! th2) + (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (thread-join! th3)) + #f) + +;;====================================================================== +;; S E R V E R - D I R E C T C A L L S +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server #f (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server #f (list run-id))) + +(define (rmt:server-info apath dbname) + (rmt:send-receive 'get-server-info #f (list apath dbname))) + +;;====================================================================== +;; Nanomsg transport +;;====================================================================== + +#;(define (is-port-in-use port-num) + (let* ((ret #f)) + (let-values (((inp oup pid) + (process "netstat" (list "-tulpn" )))) + (let loop ((inl (read-line inp))) + (if (not (eof-object? inl)) + (begin + (if (string-search (regexp (conc ":" port-num)) inl) + (begin + ;(print "Output: " inl) + (set! ret #t)) + (loop (read-line inp))))))) + ret)) + +#;(define (open-nn-connection host-port) + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port))) + (nng-dial req uri) + (socket-set! req 'nng/recvtimeo 2000) + req)) + +#;(define (send-receive-nn req msg) + (nng-send req msg) + (nng-recv req)) + +#;(define (close-nn-connection req) + (nng-close! req)) + +;; ;; open connection to server, send message, close connection +;; ;; +;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds +;; (let ((req (make-req-socket 'req)) +;; (uri (conc "tcp://" host-port)) +;; (res #f) +;; ;; (contacts (alist-ref 'contact attrib)) +;; ;; (mode (alist-ref 'mode attrib)) +;; ) +;; (socket-set! req 'nng/recvtimeo 2000) +;; (handle-exceptions +;; exn +;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) +;; ;; Send notification +;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) +;; #f) +;; (nng-dial req uri) +;; ;; (print "Connected to the server " ) +;; (nng-send req msg) +;; ;; (print "Request Sent") +;; (let* ((th1 (make-thread (lambda () +;; (let ((resp (nng-recv req))) +;; (nng-close! req) +;; (set! res (if (equal? resp "ok") +;; #t +;; #f)))) +;; "recv thread")) +;; (th2 (make-thread (lambda () +;; (thread-sleep! timeout) +;; (thread-terminate! th1)) +;; "timer thread"))) +;; (thread-start! th1) +;; (thread-start! th2) +;; (thread-join! th1) +;; res)))) +;; +#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port)) + (res #f)) + (handle-exceptions + exn + (let ((emsg ((condition-property-accessor 'exn 'message) exn))) + ;; Send notification + (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn) + #f) + (nng-dial req uri) + (nng-send req msg) + (let* ((th1 (make-thread (lambda () + (let ((resp (nng-recv req))) + (nng-close! req) + ;; (print resp) + (set! res resp))) + "recv thread")) + (th2 (make-thread (lambda () + (thread-sleep! timeout) + (thread-terminate! th1)) + "timer thread"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + res)))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; run ping in separate process, safest way in some cases +;; +#;(define (server:ping-server ifaceport) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -ping " ifaceport) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) + +;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; +#;(define (server:login toppath) + (lambda (toppath) + (set! *db-last-access* (current-seconds)) ;; might not be needed. + (if (equal? *toppath* toppath) + #t + #f))) + +;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; (define (server:release-sync-lock) +;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; (define (server:have-sync-lock?) +;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; (have-lock? (car have-lock-pair)) +;; (lock-time (cdr have-lock-pair)) +;; (lock-age (- (current-seconds) lock-time))) +;; (cond +;; (have-lock? #t) +;; ((>lock-age +;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; (server:release-sync-lock) +;; (server:have-sync-lock?)) +;; (else #f)))) + +) ADDED ulex-full/ulex.scm Index: ulex-full/ulex.scm ================================================================== --- /dev/null +++ ulex-full/ulex.scm @@ -0,0 +1,569 @@ +;; ulex: Distributed sqlite3 db +;;; +;; Copyright (C) 2018-2021 Matt Welland +;; Redistribution and use in source and binary forms, with or without +;; modification, is permitted. +;; +;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +;; DAMAGE. + +;;====================================================================== +;; ABOUT: +;; See README in the distribution at https://www.kiatoa.com/fossils/ulex +;; NOTES: +;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. +;; +;;====================================================================== + +(module ulex + * + #;( + + ;; NOTE: looking for the handler proc - find the run-listener :) + + run-listener ;; (run-listener handler-proc [port]) => uconn + + ;; NOTE: handler-proc params; + ;; (handler-proc rem-host-port qrykey cmd params) + + send-receive ;; (send-receive uconn host-port cmd data) + + ;; NOTE: cmd can be any plain text symbol except for these; + ;; 'ping 'ack 'goodbye 'response + + set-work-handler ;; (set-work-handler proc) + + wait-and-close ;; (wait-and-close uconn) + + ulex-listener? + + ;; needed to get the interface:port that was automatically found + udat-port + udat-host-port + + ;; for testing only + ;; pp-uconn + + ;; parameters + work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct + return-method ;; parameter; 'mailbox, 'polling, 'direct + ) + +(import scheme + chicken.base + chicken.file + chicken.io + chicken.time + chicken.condition + chicken.string + chicken.sort + chicken.pretty-print + + address-info + mailbox + matchable + ;; queues + regex + regex-case + simple-exceptions + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + system-information + tcp6 + typed-records + ) + +;; udat struct, used by both caller and callee +;; instantiated as uconn by convention +;; +(defstruct udat + ;; the listener side + (port #f) + (host-port #f) + (socket #f) + ;; the peers + (peers (make-hash-table)) ;; host:port->peer + ;; work handling + (work-queue (make-mailbox)) + (work-proc #f) ;; set by user + (cnum 0) ;; cookie number + (mboxes (make-hash-table)) ;; for the replies + (avail-cmboxes '()) ;; list of ( . ) for re-use + ;; threads + (numthreads 10) + (cmd-thread #f) + (work-queue-thread #f) + (num-threads-running 0) + ) + +;; Parameters + +;; work-method: +(define work-method (make-parameter 'mailbox)) +;; mailbox - all rdat goes through mailbox +;; threads - all rdat immediately executed in new thread +;; direct - no queuing +;; + +;; return-method, return the result to waiting send-receive: +(define return-method (make-parameter 'mailbox)) +;; mailbox - create a mailbox and use it for passing returning results to send-receive +;; polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result +;; direct - no queuing, result is passed back in single tcp connection +;; + +;; ;; struct for keeping track of others we are talking to +;; ;; +;; (defstruct pdat +;; (host-port #f) +;; (conns '()) ;; list of pcon structs, pop one off when calling the peer +;; ) +;; +;; ;; struct for peer connections, keep track of expiration etc. +;; ;; +;; (defstruct pcon +;; (inp #f) +;; (oup #f) +;; (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59) +;; (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes +;; ) + +;;====================================================================== +;; listener +;;====================================================================== + +;; is uconn a ulex connector (listener) +;; +(define (ulex-listener? uconn) + (udat? uconn)) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (setup-listener uconn #!optional (port 4242)) + (handle-exceptions + exn + (if (< port 65535) + (setup-listener uconn (+ port 1)) + #f) + (connect-listener uconn port))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (udat-port-set! uconn port) + (udat-host-port-set! uconn (conc addr":"port)) + (udat-socket-set! uconn tlsn) + uconn)) + +;; run-listener does all the work of starting a listener in a thread +;; it then returns control +;; +(define (run-listener handler-proc #!optional (port-suggestion 4242)) + (let* ((uconn (make-udat))) + (udat-work-proc-set! uconn handler-proc) + (if (setup-listener uconn port-suggestion) + (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop")) + (th2 (make-thread (lambda () + (case (work-method) + ((mailbox limited) + (process-work-queue uconn)))) + "Ulex work queue processor"))) + ;; (tcp-buffer-size 2048) + (thread-start! th1) + (thread-start! th2) + (udat-cmd-thread-set! uconn th1) + (udat-work-queue-thread-set! uconn th2) + (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".") + uconn) + (assert #f "ERROR: run-listener called without proper setup.")))) + +(define (wait-and-close uconn) + (thread-join! (udat-cmd-thread uconn)) + (tcp-close (udat-socket uconn))) + +;;====================================================================== +;; peers and connections +;;====================================================================== + +(define *send-mutex* (make-mutex)) + +;; send structured data to recipient +;; +;; NOTE: qrykey is what was called the "cookie" previously +;; +;; retval tells send to expect and wait for return data (one line) and return it or time out +;; this is for ping where we don't want to necessarily have set up our own server yet. +;; +;; NOTE: see below for beginnings of code to allow re-use of tcp connections +;; - I believe (without substantial evidence) that re-using connections will +;; be beneficial ... +;; +(define (send udata host-port qrykey cmd params) + (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this + (isme #f #;(equal? host-port my-host-port)) ;; calling myself? + ;; dat is a self-contained work block that can be sent or handled locally + (dat (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds))))) + (cond + (isme (ulex-handler udata dat)) ;; no transmission needed + (else + (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? + exn + (message exn) + (begin + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP + (let-values (((inp oup)(tcp-connect host-port))) + (let ((res (if (and inp oup) + (begin + (serialize dat oup) + (close-output-port oup) + (deserialize inp) + ) + (begin + (print "ERROR: send called but no receiver has been setup. Please call setup first!") + #f)))) + (close-input-port inp) + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + res)))))))) ;; res will always be 'ack unless return-method is direct + +(define (send-via-polling uconn host-port cmd data) + (let* ((qrykey (make-cookie uconn)) + (sres (send uconn host-port qrykey cmd data))) + (case sres + ((ack) + (let loop ((start-time (current-milliseconds))) + (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout + (begin + (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data) + #f) + (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash + (if result ;; result is '(status . result-data) or #f for nothing yet + (begin + (hash-table-delete! (udat-mboxes uconn) qrykey) + (cdr result)) + (begin + (thread-sleep! 0.01) + (loop start-time))))))) + (else + (print "ULEX ERROR: Communication failed? sres="sres) + #f)))) + +(define (send-via-mailbox uconn host-port cmd data) + (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? + (qrykey (car cmbox)) + (mbox (cdr cmbox)) + (mbox-time (current-milliseconds)) + (sres (send uconn host-port qrykey cmd data))) ;; short res + (if (eq? sres 'ack) + (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread))) + #f + 120)) ;; timeout) + (mbox-timeout-result 'MBOX_TIMEOUT) + (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) + (mbox-receive-time (current-milliseconds))) + ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? + (hash-table-delete! (udat-mboxes uconn) qrykey) + (if (eq? res 'MBOX_TIMEOUT) + (begin + (print "WARNING: mbox timed out for query "cmd", with data "data + ", waiting for response from "host-port".") + + ;; here it might make sense to clean up connection records and force clean start? + ;; NO. The progam using ulex needs to do the reset. Right thing here is exception + + #f) ;; convert to raising exception? + res)) + (begin + (print "ERROR: Communication failed? Got "sres) + #f)))) + +;; send a request to the given host-port and register a mailbox in udata +;; wait for the mailbox data and return it +;; +(define (send-receive uconn host-port cmd data) + (let* ((start-time (current-milliseconds)) + (result (cond + ((member cmd '(ping goodbye)) ;; these are immediate + (send uconn host-port 'ping cmd data)) + ((eq? (work-method) 'direct) + ;; the result from send will be the actual result, not an 'ack + (send uconn host-port 'direct cmd data)) + (else + (case (return-method) + ((polling) + (send-via-polling uconn host-port cmd data)) + ((mailbox) + (send-via-mailbox uconn host-port cmd data)) + (else + (print "ULEX ERROR: unrecognised return-method "(return-method)".") + #f))))) + (duration (- (current-milliseconds) start-time))) + ;; this is ONLY for development and debugging. It will be removed once Ulex is stable. + (if (< 5000 duration) + (print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000))) + " seconds; "cmd", host-port="host-port", data="data)) + result)) + + +;;====================================================================== +;; responder side +;;====================================================================== + +;; take a request, rdat, and if not immediate put it in the work queue +;; +;; Reserved cmds; ack ping goodbye response +;; +(define (ulex-handler uconn rdat) + (assert (list? rdat) "FATAL: ulex-handler give rdat as not list") + (match rdat ;; (string-split controldat) + ((rem-host-port qrykey cmd params);; timedata) + ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params) + (case cmd + ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack) + ((ping) + ;; (print "Got Ping!") + ;; (add-to-work-queue uconn rdat) + 'ack) + ((goodbye) + ;; just clear out references to the caller. NOT COMPLETE + (add-to-work-queue uconn rdat) + 'ack) + ((response) ;; this is a result from remote processing, send it as mail ... + (case (return-method) + ((polling) + (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params)) + 'ack) + ((mailbox) + (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) + (if mbox + (begin + (mailbox-send! mbox params) ;; params here is our result + 'ack) + (begin + (print "ERROR: received result but no associated mbox for cookie "qrykey) + 'no-mbox-found)))) + (else (print "ULEX ERROR: unrecognised return-method "(return-method)) + 'bad-return-method))) + (else ;; generic request - hand it to the work queue + (add-to-work-queue uconn rdat) + 'ack))) + (else + (print "ULEX ERROR: bad rdat "rdat) + 'bad-rdat))) + +;; given an already set up uconn start the cmd-loop +;; +(define (ulex-cmd-loop uconn) + (let* ((serv-listener (udat-socket uconn)) + (listener (lambda () + (let loop ((state 'start)) + (let-values (((inp oup)(tcp-accept serv-listener))) + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP + (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params) + (resp (ulex-handler uconn rdat))) + (serialize resp oup) + (close-input-port inp) + (close-output-port oup) + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + ) + (loop state)))))) + ;; start N of them + (let loop ((thnum 0) + (threads '())) + (if (< thnum 100) + (let* ((th (make-thread listener (conc "listener" thnum)))) + (thread-start! th) + (loop (+ thnum 1) + (cons th threads))) + (map thread-join! threads))))) + +;; add a proc to the cmd list, these are done symetrically (i.e. in all instances) +;; so that the proc can be dereferenced remotely +;; +(define (set-work-handler uconn proc) + (udat-work-proc-set! uconn proc)) + +;;====================================================================== +;; work queues - this is all happening on the listener side +;;====================================================================== + +;; rdat is (rem-host-port qrykey cmd params) + +(define (add-to-work-queue uconn rdat) + #;(queue-add! (udat-work-queue uconn) rdat) + (case (work-method) + ((threads) + (thread-start! (make-thread (lambda () + (do-work uconn rdat)) + "worker thread"))) + ((mailbox) + (mailbox-send! (udat-work-queue uconn) rdat)) + ((direct) + (do-work uconn rdat)) + (else + (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.") + (mailbox-send! (udat-work-queue uconn) rdat)))) + +;; move the logic to return the result somewhere else? +;; +(define (do-work uconn rdat) + (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change + ;; put this following into a do-work procedure + (match rdat + ((rem-host-port qrykey cmd params) + (let* ((start-time (current-milliseconds)) + (result (proc rem-host-port qrykey cmd params)) + (end-time (current-milliseconds)) + (run-time (- end-time start-time))) + (case (work-method) + ((direct) result) + (else + (print "ULEX: work "cmd", "params" done in "run-time" ms") + ;; send 'response as cmd and result as params + (send uconn rem-host-port qrykey 'response result) ;; could check for ack + (print "ULEX: response sent back to "rem-host-port" in "(- (current-milliseconds) end-time)))))) + (MBOX_TIMEOUT 'do-work-timeout) + (else + (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params"))))) + +;; NEW APPROACH: +;; +(define (process-work-queue uconn) + (let ((wqueue (udat-work-queue uconn)) + (proc (udat-work-proc uconn)) + (numthr (udat-numthreads uconn))) + (let loop ((thnum 1) + (threads '())) + (let ((thlst (cons (make-thread (lambda () + (let work-loop () + (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT))) + (do-work uconn rdat)) + (work-loop))) + (conc "work thread " thnum)) + threads))) + (if (< thnum numthr) + (loop (+ thnum 1) + thlst) + (begin + (print "ULEX: Starting "(length thlst)" worker threads.") + (map thread-start! thlst) + (print "ULEX: Threads started. Joining all.") + (map thread-join! thlst))))))) + +;; below was to enable re-use of connections. This seems non-trivial so for +;; now lets open on each call +;; +;; ;; given host-port get or create peer struct +;; ;; +;; (define (udat-get-peer uconn host-port) +;; (or (hash-table-ref/default (udat-peers uconn) host-port #f) +;; ;; no peer, so create pdat and init it +;; +;; ;; NEED stack of connections, pop and use; inp, oup, +;; ;; creation_time (remove and create new if over 24hrs old +;; ;; +;; (let ((pdat (make-pdat host-port: host-port))) +;; (hash-table-set! (udat-peers uconn) host-port pdat) +;; pdat))) +;; +;; ;; is pcon alive +;; +;; ;; given host-port and pdat get a pcon +;; ;; +;; (define (pdat-get-pcon pdat host-port) +;; (let loop ((conns (pdat-conns pdat))) +;; (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later +;; (init-pcon (make-pcon)) +;; (let* ((conn (pop conns))) +;; +;; ;; given host-port get a pcon struct +;; ;; +;; (define (udat-get-pcon + +;;====================================================================== +;; misc utils +;;====================================================================== + +(define (make-cookie uconn) + (let ((newcnum (+ (udat-cnum uconn) 1))) + (udat-cnum-set! uconn newcnum) + (conc (udat-host-port uconn) ":" + newcnum))) + +;; cookie/mboxes + +;; we store each mbox with a cookie ( . ) +;; +(define (get-cmbox uconn) + (if (null? (udat-avail-cmboxes uconn)) + (let ((cookie (make-cookie uconn)) + (mbox (make-mailbox))) + (hash-table-set! (udat-mboxes uconn) cookie mbox) + `(,cookie . ,mbox)) + (let ((cmbox (car (udat-avail-cmboxes uconn)))) + (udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn))) + cmbox))) + +(define (put-cmbox uconn cmbox) + (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn)))) + +(define (pp-uconn uconn) + (pp (udat->alist uconn))) + + +;;====================================================================== +;; network utilities +;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + +(define (get-my-best-address) + (let ((all-my-addresses (get-all-ips))) + (cond + ((null? all-my-addresses) + (get-host-name)) ;; no interfaces? + ((eq? (length all-my-addresses) 1) + (car all-my-addresses)) ;; only one to choose from, just go with it + (else + (car (sort all-my-addresses ip-pref-less?)))))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips) + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +) ADDED ulex-none/dbmgr.scm Index: ulex-none/dbmgr.scm ================================================================== --- /dev/null +++ ulex-none/dbmgr.scm @@ -0,0 +1,1123 @@ +;;====================================================================== +;; Copyright 2022, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbmgrmod)) + +(declare (uses ulex)) +(declare (uses apimod)) +(declare (uses pkts)) +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses portloggermod)) +(declare (uses debugprint)) + +(module dbmgrmod + * + +(import scheme + chicken.base + chicken.condition + chicken.file + chicken.format + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + + (prefix sqlite3 sqlite3:) + matchable + md5 + message-digest + regex + s11n + srfi-1 + srfi-18 + srfi-69 + system-information + typed-records + + pkts + ulex + + commonmod + apimod + dbmod + debugprint + (prefix mtargs args:) + portloggermod + ) + +;; ;; Configurations for server +;; ;; (tcp-buffer-size 2048) +;; ;; (max-connections 2048) +;; +;; ;; info about me as a listener and my connections to db servers +;; ;; stored (for now) in *db-serv-info* +;; ;; +;; (defstruct servdat +;; (host #f) +;; (port #f) +;; (uuid #f) +;; (dbfile #f) +;; (uconn #f) ;; this is the listener *FOR THIS PROCESS* +;; (mode #f) +;; (status 'starting) +;; (trynum 0) ;; count the number of ports we've tried +;; (conns (make-hash-table)) ;; apath/dbname => conndat +;; ) +;; +;; (define *db-serv-info* (make-servdat)) +;; +;; (define (servdat->url sdat) +;; (conc (servdat-host sdat)":"(servdat-port sdat))) +;; +;; ;; db servers contact info +;; ;; +;; (defstruct conndat +;; (apath #f) +;; (dbname #f) +;; (fullname #f) +;; (hostport #f) +;; (ipaddr #f) +;; (port #f) +;; (srvpkt #f) +;; (srvkey #f) +;; (lastmsg 0) +;; (expires 0)) +;; +;; (define *srvpktspec* +;; `((server (host . h) +;; (port . p) +;; (servkey . k) +;; (pid . i) +;; (ipaddr . a) +;; (dbpath . d)))) +;; +;; ;;====================================================================== +;; ;; S U P P O R T F U N C T I O N S +;; ;;====================================================================== +;; +;; ;; set up the api proc, seems like there should be a better place for this? +;; ;; +;; ;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE +;; ;; +;; ;; (define api-proc (make-parameter conc)) +;; ;; (api-proc api:execute-requests) +;; +;; ;; do we have a connection to apath dbname and +;; ;; is it not expired? then return it +;; ;; +;; ;; else setup a connection +;; ;; +;; ;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception +;; ;; +;; (define (rmt:get-conn remdat apath dbname) +;; (let* ((fullname (db:dbname->path apath dbname))) +;; (hash-table-ref/default (servdat-conns remdat) fullname #f))) +;; +;; (define (rmt:drop-conn remdat apath dbname) +;; (let* ((fullname (db:dbname->path apath dbname))) +;; (hash-table-delete! (servdat-conns remdat) fullname))) +;; +;; (define (rmt:find-main-server uconn apath dbname) +;; (let* ((pktsdir (get-pkts-dir apath)) +;; (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) +;; (viable-srvs (get-viable-servers all-srvpkts dbname))) +;; (get-the-server uconn apath viable-srvs))) +;; +;; +;; (define *connstart-mutex* (make-mutex)) +;; (define *last-main-start* 0) +;; +;; ;; looks for a connection to main, returns if have and not exired +;; ;; creates new otherwise +;; ;; +;; ;; connections for other servers happens by requesting from main +;; ;; +;; ;; TODO: This is unnecessarily re-creating the record in the hash table +;; ;; +;; (define (rmt:open-main-connection remdat apath) +;; (let* ((fullpath (db:dbname->path apath ".db/main.db")) +;; (conns (servdat-conns remdat)) +;; (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this +;; (start-rmt:run (lambda () +;; (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server"))) +;; (thread-start! th1) +;; (thread-sleep! 1) +;; (let loop ((count 0)) +;; (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection") +;; (if (or (not *db-serv-info*) +;; (not (servdat-uconn *db-serv-info*))) +;; (begin +;; (thread-sleep! 1) +;; (loop (+ count 1))) +;; (begin +;; (servdat-mode-set! *db-serv-info* 'non-db) +;; (servdat-uconn *db-serv-info*))))))) +;; (myconn (servdat-uconn *db-serv-info*))) +;; (cond +;; ((not myconn) +;; (start-rmt:run) +;; (rmt:open-main-connection remdat apath)) +;; ((and conn ;; conn is NOT a socket, just saying ... +;; (< (current-seconds) (conndat-expires conn))) +;; #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died +;; ((and conn +;; (>= (current-seconds)(conndat-expires conn))) +;; (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") +;; (rmt:drop-conn remdat apath ".db/main.db") ;; +;; (rmt:open-main-connection remdat apath)) +;; (else +;; ;; Below we will find or create and connect to main +;; (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch") +;; (let* ((dbname (db:run-id->dbname #f)) +;; (the-srv (rmt:find-main-server myconn apath dbname)) +;; (start-main-srv (lambda () ;; call IF there is no the-srv found +;; (mutex-lock! *connstart-mutex*) +;; (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server +;; (begin +;; (api:run-server-process apath dbname) +;; (set! *last-main-start* (current-seconds)) +;; (thread-sleep! 1)) +;; (thread-sleep! 0.25)) +;; (mutex-unlock! *connstart-mutex*) +;; (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries +;; ))) +;; (if (not the-srv) ;; have server, try connecting to it +;; (start-main-srv) +;; (let* ((srv-addr (server-address the-srv)) ;; need serv +;; (ipaddr (alist-ref 'ipaddr the-srv)) +;; (port (alist-ref 'port the-srv)) +;; (srvkey (alist-ref 'servkey the-srv)) +;; (fullpath (db:dbname->path apath dbname)) +;; +;; (new-the-srv (make-conndat +;; apath: apath +;; dbname: dbname +;; fullname: fullpath +;; hostport: srv-addr +;; ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection? +;; ipaddr: ipaddr +;; port: port +;; srvpkt: the-srv +;; srvkey: srvkey ;; generated by rmt:get-signature on the server side +;; lastmsg: (current-seconds) +;; expires: (+ (current-seconds) +;; (server:expiration-timeout) +;; -2) ;; this needs to be gathered during the ping +;; ))) +;; (hash-table-set! conns fullpath new-the-srv))) +;; #t))))) +;; +;; ;; NB// sinfo is a servdat struct +;; ;; +;; (define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5)) +;; (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") +;; (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable +;; (fullname (db:dbname->path apath dbname)) +;; (conns (servdat-conns sinfo)) +;; (mconn (rmt:get-conn sinfo apath ".db/main.db")) +;; (dconn (rmt:get-conn sinfo apath dbname))) +;; #;(if (and mconn +;; (not (debug:print-logger))) +;; (begin +;; (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") +;; (debug:print-logger rmt:log-to-main))) +;; (cond +;; ((and mconn +;; dconn +;; (< (current-seconds)(conndat-expires dconn))) +;; #t) ;; good to go +;; ((not mconn) ;; no channel open to main? open it... +;; (rmt:open-main-connection sinfo apath) +;; (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) +;; ((not dconn) ;; no channel open to dbname? +;; (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname)))) +;; (case res +;; ((server-started) +;; (if (> num-tries 0) +;; (begin +;; (thread-sleep! 2) +;; (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) +;; (begin +;; (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) +;; (exit 1)))) +;; (else +;; (if (list? res) ;; server has been registered and the info was returned. pass it on. +;; (begin ;; ("192.168.0.9" 53817 +;; ;; "5e34239f48e8973b3813221e54701a01" "24310" +;; ;; "192.168.0.9" +;; ;; "/home/matt/data/megatest/tests/simplerun" +;; ;; ".db/1.db") +;; (match +;; res +;; ((host port servkey pid ipaddr apath dbname) +;; (debug:print-info 0 *default-log-port* "got "res) +;; (hash-table-set! conns +;; fullname +;; (make-conndat +;; apath: apath +;; dbname: dbname +;; hostport: (conc host":"port) +;; ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection? +;; ipaddr: ipaddr +;; port: port +;; srvkey: servkey +;; lastmsg: (current-seconds) +;; expires: (+ (current-seconds) +;; (server:expiration-timeout) +;; -2)))) +;; (else +;; (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) +;; res) +;; (begin +;; (debug:print-info 0 *default-log-port* "Unexpected result: " res) +;; res))))))) +;; #t)) +;; +;; ;;====================================================================== +;; +;; ;; FOR DEBUGGING SET TO #t +;; ;; (define *localmode* #t) +;; (define *localmode* #f) +(define *dbstruct* (make-dbr:dbstruct)) + +;; Defaults to current area +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (dbname (db:run-id->dbname rid))) + (api:execute-requests *dbstruct* cmd params))) + +;; ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; ;; sometime in the future +;; ;; +;; (define (rmt:send-receive-real sinfo apath dbname cmd params) +;; (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.") +;; (let* ((cdat (rmt:get-conn sinfo apath dbname))) +;; (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") +;; (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex +;; ;; then send-receive using the ulex layer to host-port stored in cdat +;; (res (send-receive uconn (conndat-hostport cdat) cmd params)) +;; #;(th1 (make-thread (lambda () +;; (set! res (send-receive uconn (conndat-hostport cdat) cmd params))) +;; "send-receive thread"))) +;; ;; (thread-start! th1) +;; ;; (thread-join! th1) ;; gratuitious thread stuff is so that mailbox is not used in primordial thead +;; ;; since we accessed the server we can bump the expires time up +;; (conndat-expires-set! cdat (+ (current-seconds) +;; (server:expiration-timeout) +;; -2)) ;; two second margin for network time misalignments etc. +;; res))) +;; +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future. +;; +;; Purpose - call the main.db server and request a server be started +;; for the given area path and dbname +;; + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +;; ;; host and port are used to ensure we are remove proper records +;; (define (rmt:server-shutdown host port) +;; (let ((dbfile (servdat-dbfile *db-serv-info*))) +;; (debug:print-info 0 *default-log-port* "dbfile is "dbfile) +;; (if dbfile +;; (let* ((am-server (args:get-arg "-server")) +;; (dbfile (args:get-arg "-db")) +;; (apath *toppath*) +;; #;(sinfo *remotedat*)) ;; foundation for future fix +;; (if *dbstruct-db* +;; (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) +;; (db (dbr:dbdat-db dbdat)) +;; (inmem (dbr:dbdat-db dbdat)) ;; WRONG +;; ) +;; ;; do a final sync here +;; (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) +;; (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) +;; ;; let's finalize here +;; (debug:print-info 0 *default-log-port* "Finalizing db and inmem") +;; (if (sqlite3:database? db) +;; (sqlite3:finalize! db) +;; (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) +;; (if (sqlite3:database? inmem) +;; (sqlite3:finalize! inmem) +;; (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) +;; (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) +;; (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) +;; (if (not am-server) +;; (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") +;; (if (string-match ".*/main.db$" dbfile) +;; (let ((pkt-file (conc (get-pkts-dir *toppath*) +;; "/" (servdat-uuid *db-serv-info*) +;; ".pkt"))) +;; (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) +;; (delete-file* pkt-file) +;; (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) +;; (db:with-lock-db +;; (servdat-dbfile *db-serv-info*) +;; (lambda (dbh dbfile) +;; (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove +;; (let* ((sdat *db-serv-info*) ;; we have a run-id server +;; (host (servdat-host sdat)) +;; (port (servdat-port sdat)) +;; (uuid (servdat-uuid sdat)) +;; (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile))) +;; (debug:print-info 0 *default-log-port* "deregistered-server, res="res) +;; (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) +;; ))))))) +;; +;; +;; (define (common:run-sync?) +;; ;; (and (common:on-homehost?) +;; (args:get-arg "-server")) +;; +;; (define *rmt:run-mutex* (make-mutex)) +;; (define *rmt:run-flag* #f) +;; +;; ;; Main entry point to start a server. was start-server +;; (define (rmt:run hostn) +;; (mutex-lock! *rmt:run-mutex*) +;; (if *rmt:run-flag* +;; (begin +;; (debug:print-warn 0 *default-log-port* "rmt:run already running.") +;; (mutex-unlock! *rmt:run-mutex*)) +;; (begin +;; (set! *rmt:run-flag* #t) +;; (mutex-unlock! *rmt:run-mutex*) +;; ;; ;; Configurations for server +;; ;; (tcp-buffer-size 2048) +;; ;; (max-connections 2048) +;; (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") +;; (if (and *db-serv-info* +;; (servdat-uconn *db-serv-info*)) +;; (let* ((uconn (servdat-uconn *db-serv-info*))) +;; (wait-and-close uconn)) +;; (let* ((port (portlogger:open-run-close portlogger:find-port)) +;; (handler-proc (lambda (rem-host-port qrykey cmd params) ;; +;; (set! *db-last-access* (current-seconds)) +;; (assert (list? params) "FATAL: handler called with non-list params") +;; (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params) +;; (debug:print 0 *default-log-port* "handler call: "cmd", params="params) +;; (api:execute-requests *dbstruct-db* cmd params)))) +;; ;; (api:process-request *dbstuct-db* +;; (if (not *db-serv-info*) +;; (set! *db-serv-info* (make-servdat host: hostn port: port))) +;; (let* ((uconn (run-listener handler-proc port)) +;; (rport (udat-port uconn))) ;; the real port +;; (servdat-host-set! *db-serv-info* hostn) +;; (servdat-port-set! *db-serv-info* rport) +;; (servdat-uconn-set! *db-serv-info* uconn) +;; (wait-and-close uconn) +;; (db:print-current-query-stats) +;; ))) +;; (let* ((host (servdat-host *db-serv-info*)) +;; (port (servdat-port *db-serv-info*)) +;; (mode (or (servdat-mode *db-serv-info*) +;; "non-db"))) +;; ;; server exit stuff here +;; ;; (rmt:server-shutdown host port) - always do in on-exit +;; ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit +;; (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting") +;; )))) +;; +;; ;;====================================================================== +;; ;; S E R V E R U T I L I T I E S +;; ;;====================================================================== +;; +;; +;; ;;====================================================================== +;; ;; NEW SERVER METHOD +;; ;;====================================================================== +;; +;; ;; only use for main.db - need to re-write some of this :( +;; ;; +;; (define (get-lock-db sdat dbfile host port) +;; (assert host "FATAL: get-lock-db called with host not set.") +;; (assert port "FATAL: get-lock-db called with port not set.") +;; (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations +;; (res (db:get-iam-server-lock dbh dbfile host port)) +;; (uconn (servdat-uconn sdat))) +;; ;; res => list then already locked, check server is responsive +;; ;; => #t then sucessfully got the lock +;; ;; => #f reserved for future use as to indicate something went wrong +;; (match res +;; ((owner_pid owner_host owner_port event_time) +;; (if (server-ready? uconn (conc owner_host":"owner_port) "abc") +;; #f ;; locked by someone else +;; (begin ;; locked by someone dead and gone +;; (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") +;; (db:steal-lock-db dbh dbfile port)))) +;; (#t #t) ;; placeholder so that we don't touch res if it is #t +;; (else (set! res #f))) +;; (sqlite3:finalize! dbh) +;; res)) +;; +;; +;; (define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) +;; (let* ((pkt-dat `((host . ,host) +;; (port . ,port) +;; (servkey . ,servkey) +;; (pid . ,(current-process-id)) +;; (ipaddr . ,ipaddr) +;; (dbpath . ,dbpath))) +;; (uuid (write-alist->pkt +;; pkts-dir +;; pkt-dat +;; pktspec: pkt-spec +;; ptype: 'server))) +;; (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) +;; uuid)) +;; +;; (define (get-pkts-dir #!optional (apath #f)) +;; (let* ((effective-toppath (or *toppath* apath))) +;; (assert effective-toppath +;; "ERROR: get-pkts-dir called without *toppath* set. Exiting.") +;; (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) +;; (if (file-exists? pdir) +;; pdir +;; (begin +;; (handle-exceptions ;; this exception handler should NOT be needed but ... +;; exn +;; pdir +;; (create-directory pdir #t)) +;; pdir))))) +;; +;; ;; given a pkts dir read +;; ;; +;; (define (get-all-server-pkts pktsdir-in pktspec) +;; (let* ((pktsdir (if (file-exists? pktsdir-in) +;; pktsdir-in +;; (begin +;; (create-directory pktsdir-in #t) +;; pktsdir-in))) +;; (all-pkt-files (glob (conc pktsdir "/*.pkt")))) +;; (map (lambda (pkt-file) +;; (read-pkt->alist pkt-file pktspec: pktspec)) +;; all-pkt-files))) +;; +;; (define (server-address srv-pkt) +;; (conc (alist-ref 'host srv-pkt) ":" +;; (alist-ref 'port srv-pkt))) + +(define (server-ready? uconn host-port key) ;; server-address is host:port + #;(let* ((params `((cmd . ping)(key . ,key))) + (data `((cmd . ping) + (key . ,key) + (params . ,params))) ;; I don't get it. + (res (send-receive uconn host-port 'ping data))) + (if (eq? res 'ack) ;; yep, likely it is who we want on the other end + res + #f)) + #t) + +;; ; from the pkts return servers associated with dbpath +;; ;; NOTE: Only one can be alive - have to check on each +;; ;; in the list of pkts returned +;; ;; +;; (define (get-viable-servers serv-pkts dbpath) +;; (let loop ((tail serv-pkts) +;; (res '())) +;; (if (null? tail) +;; res ;; NOTE: sort by age so oldest is considered first +;; (let* ((spkt (car tail))) +;; (loop (cdr tail) +;; (if (equal? dbpath (alist-ref 'dbpath spkt)) +;; (cons spkt res) +;; res)))))) +;; +;; (define (remove-pkts-if-not-alive uconn serv-pkts) +;; (filter (lambda (pkt) +;; (let* ((host (alist-ref 'host pkt)) +;; (port (alist-ref 'port pkt)) +;; (host-port (conc host":"port)) +;; (key (alist-ref 'servkey pkt)) +;; (pktz (alist-ref 'Z pkt)) +;; (res (server-ready? uconn host-port key))) +;; (if res +;; res +;; (let* ((pktsdir (get-pkts-dir *toppath*)) +;; (pktpath (conc pktsdir"/"pktz".pkt"))) +;; (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) +;; (delete-file* pktpath) +;; #f)))) +;; serv-pkts)) +;; +;; ;; from viable servers get one that is alive and ready +;; ;; +;; (define (get-the-server uconn apath serv-pkts) +;; (let loop ((tail serv-pkts)) +;; (if (null? tail) +;; #f +;; (let* ((spkt (car tail)) +;; (host (alist-ref 'ipaddr spkt)) +;; (port (alist-ref 'port spkt)) +;; (host-port (conc host":"port)) +;; (dbpth (alist-ref 'dbpath spkt)) +;; (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt)) +;; (addr (server-address spkt))) +;; (if (server-ready? uconn host-port srvkey) +;; spkt +;; (loop (cdr tail))))))) +;; +;; ;; am I the "first" in line server? I.e. my D card is smallest +;; ;; use Z card as tie breaker +;; ;; +;; (define (get-best-candidate serv-pkts dbpath) +;; (if (null? serv-pkts) +;; #f +;; (let loop ((tail serv-pkts) +;; (best (car serv-pkts))) +;; (if (null? tail) +;; best +;; (let* ((candidate (car tail)) +;; (candidate-bd (string->number (alist-ref 'D candidate))) +;; (best-bd (string->number (alist-ref 'D best))) +;; ;; bigger number is younger +;; (candidate-z (alist-ref 'Z candidate)) +;; (best-z (alist-ref 'Z best)) +;; (new-best (cond +;; ((> best-bd candidate-bd) ;; best is younger than candidate +;; candidate) +;; ((< best-bd candidate-bd) ;; candidate is younger than best +;; best) +;; (else +;; (if (string>=? best-z candidate-z) +;; best +;; candidate))))) ;; use Z card as tie breaker +;; (if (null? tail) +;; new-best +;; (loop (cdr tail) new-best))))))) +;; +;; +;; ;;====================================================================== +;; ;; END NEW SERVER METHOD +;; ;;====================================================================== +;; +;; ;; if .db/main.db check the pkts +;; ;; +;; (define (rmt:wait-for-server pkts-dir db-file server-key) +;; (let* ((sdat *db-serv-info*)) +;; (let loop ((start-time (current-seconds)) +;; (changed #t) +;; (last-sdat "not this")) +;; (begin ;; let ((sdat #f)) +;; (thread-sleep! 0.01) +;; (debug:print-info 0 *default-log-port* "Waiting for server alive signature") +;; (mutex-lock! *heartbeat-mutex*) +;; (set! sdat *db-serv-info*) +;; (mutex-unlock! *heartbeat-mutex*) +;; (if (and sdat +;; (not changed) +;; (> (- (current-seconds) start-time) 2)) +;; (let* ((uconn (servdat-uconn sdat))) +;; (servdat-status-set! sdat 'iface-stable) +;; (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server") +;; ;; create a server pkt in *toppath*/.meta/srvpkts +;; +;; ;; TODO: +;; ;; 1. change sdat to stuct +;; ;; 2. add uuid to struct +;; ;; 3. update uuid in sdat here +;; ;; +;; (servdat-uuid-set! sdat +;; (register-server +;; pkts-dir *srvpktspec* +;; (get-host-name) +;; (servdat-port sdat) server-key +;; (servdat-host sdat) db-file)) +;; ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key +;; ;; now read pkts and see if we are a contender +;; (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) +;; (viables (get-viable-servers all-pkts db-file)) +;; (alive (remove-pkts-if-not-alive uconn viables)) +;; (best-srv (get-best-candidate alive db-file)) +;; (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) +;; (i-am-srv (equal? best-srv-key server-key)) +;; (delete-pkt (lambda () +;; (let* ((pktfile (conc (get-pkts-dir *toppath*) +;; "/" (servdat-uuid *db-serv-info*) +;; ".pkt"))) +;; (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) +;; (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit +;; (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) +;; ;; am I the best-srv, compare server-keys to know +;; (if i-am-srv +;; (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) +;; (begin +;; (debug:print-info 0 *default-log-port* "I'm the server!") +;; (servdat-dbfile-set! sdat db-file) +;; (servdat-status-set! sdat 'db-locked)) +;; (begin +;; (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") +;; (bdat-time-to-exit-set! *bdat* #t) +;; (delete-pkt) +;; (thread-sleep! 0.2) +;; (exit))) +;; (begin +;; (debug:print-info 0 *default-log-port* +;; "Keys do not match "best-srv-key", "server-key", exiting.") +;; (bdat-time-to-exit-set! *bdat* #t) +;; (delete-pkt) +;; (thread-sleep! 0.2) +;; (exit))) +;; sdat)) +;; (begin ;; sdat not yet contains server info +;; (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) +;; (sleep 4) +;; (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes +;; (begin +;; (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") +;; (exit)) +;; (loop start-time +;; (equal? sdat last-sdat) +;; sdat)))))))) +;; +;; (define (rmt:register-server sinfo apath iface port server-key dbname) +;; (servdat-conns sinfo) ;; just checking types +;; (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db +;; (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath +;; (db:run-id->dbname #f) +;; 'register-server `(,iface +;; ,port +;; ,server-key +;; ,(current-process-id) +;; ,iface +;; ,apath +;; ,dbname))) +;; +;; (define (rmt:get-count-servers sinfo apath) +;; (servdat-conns sinfo) ;; just checking types +;; (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db +;; (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath +;; (db:run-id->dbname #f) +;; 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) + +;; (define (rmt:deregister-server db-serv-info apath iface port server-key dbname) +;; (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db +;; (rmt:send-receive-real db-serv-info apath ;; params: host port servkey pid ipaddr dbpath +;; (db:run-id->dbname #f) +;; 'deregister-server `(,iface +;; ,port +;; ,server-key +;; ,(current-process-id) +;; ,iface +;; ,apath +;; ,dbname))) +;; +;; (define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) +;; ;; wait until *db-serv-info* stops changing +;; (let* ((stime (current-seconds))) +;; (let loop ((last-host #f) +;; (last-port #f) +;; (tries 0)) +;; (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*))) +;; (curr-port (and *db-serv-info* (servdat-port *db-serv-info*)))) +;; ;; first we verify port and interface, update *db-serv-info* in need be. +;; (cond +;; ((> tries num-tries-allowed) +;; (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") +;; (exit 1)) +;; ((not *db-serv-info*) +;; (thread-sleep! 0.25) +;; (loop curr-host curr-port (+ tries 1))) +;; ((or (not last-host)(not last-port)) +;; (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) +;; (thread-sleep! 0.25) +;; (loop curr-host curr-port (+ tries 1))) +;; ((or (not (equal? last-host curr-host)) +;; (not (equal? last-port curr-port))) +;; (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") +;; (thread-sleep! 0.25) +;; (loop curr-host curr-port (+ tries 1))) +;; ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed +;; (thread-sleep! 0.5) +;; (loop curr-host curr-port (+ tries 1))) +;; (else +;; (rmt:get-signature) ;; sets *my-signature* as side effect +;; (servdat-status-set! *db-serv-info* 'interface-stable) +;; (debug:print 0 *default-log-port* +;; "SERVER STARTED: " curr-host +;; ":" curr-port +;; " AT " (current-seconds) " server signature: " *my-signature* +;; " with "(servdat-trynum *db-serv-info*)" port changes") +;; (flush-output *default-log-port*) +;; #t)))))) +;; +;; ;; run rmt:keep-running in a parallel thread to monitor that the db is being +;; ;; used and to shutdown after sometime if it is not. +;; ;; +;; (define (rmt:keep-running dbname) +;; ;; if none running or if > 20 seconds since +;; ;; server last used then start shutdown +;; ;; This thread waits for the server to come alive +;; (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") +;; +;; (let* ((sinfo *db-serv-info*) +;; (server-start-time (current-seconds)) +;; (pkts-dir (get-pkts-dir)) +;; (server-key (rmt:get-signature)) ;; This servers key +;; (is-main (equal? (args:get-arg "-db") ".db/main.db")) +;; (last-access 0) +;; (server-timeout (server:expiration-timeout)) +;; (shutdown-server-sequence (lambda (host port) +;; (set! *unclean-shutdown* #f) ;; Should not be needed anymore +;; (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) +;; ;; (rmt:server-shutdown host port) -- called in on-exit +;; ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit +;; (exit))) +;; (timed-out? (lambda () +;; (<= (+ last-access server-timeout) +;; (current-seconds))))) +;; (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db")) +;; ;; main and run db servers have both got wait logic (could/should merge it) +;; (if is-main +;; (rmt:wait-for-server pkts-dir dbname server-key) +;; (rmt:wait-for-stable-interface)) +;; ;; this is our forever loop +;; (let* ((iface (servdat-host *db-serv-info*)) +;; (port (servdat-port *db-serv-info*)) +;; (uconn (servdat-uconn *db-serv-info*))) +;; (let loop ((count 0) +;; (bad-sync-count 0) +;; (start-time (current-milliseconds))) +;; (if (and (not is-main) +;; (common:low-noise-print 60 "servdat-status")) +;; (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*))) +;; +;; (mutex-lock! *heartbeat-mutex*) +;; ;; set up the database handle +;; (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate +;; (let ((watchdog (bdat-watchdog *bdat*))) +;; (debug:print 0 *default-log-port* "SERVER: dbprep") +;; (db:setup dbname) ;; sets *dbstruct-db* as side effect +;; (servdat-status-set! *db-serv-info* 'db-opened) +;; ;; IFF I'm not main, call into main and register self +;; (if (not is-main) +;; (let ((res (rmt:register-server sinfo +;; *toppath* iface port +;; server-key dbname))) +;; (if res ;; we are the server +;; (servdat-status-set! *db-serv-info* 'have-interface-and-db) +;; ;; now check that the db locker is alive, clear it out if not +;; (let* ((serv-info (rmt:server-info *toppath* dbname))) +;; (match serv-info +;; ((host port servkey pid ipaddr apath dbpath) +;; (if (not (server-ready? uconn (conc host":"port) servkey)) +;; (begin +;; (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") +;; (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) +;; (loop (+ count 1) bad-sync-count start-time)))) +;; (else +;; (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) +;; (exit))))))) +;; (debug:print 0 *default-log-port* +;; "SERVER: running, db "dbname" opened, megatest version: " +;; (common:get-full-version)) +;; ;; start the watchdog +;; +;; ;; is this really needed? +;; +;; #;(if watchdog +;; (if (not (member (thread-state watchdog) +;; '(ready running blocked +;; sleeping dead))) +;; (begin +;; (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")") +;; (thread-start! watchdog)) +;; (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")")) +;; (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it.")) +;; #;(loop (+ count 1) bad-sync-count start-time) +;; )) +;; +;; (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) +;; +;; (mutex-unlock! *heartbeat-mutex*) +;; +;; ;; when things go wrong we don't want to be doing the various +;; ;; queries too often so we strive to run this stuff only every +;; ;; four seconds or so. +;; (let* ((sync-time (- (current-milliseconds) start-time)) +;; (rem-time (quotient (- 4000 sync-time) 1000))) +;; (if (and (<= rem-time 4) +;; (> rem-time 0)) +;; (thread-sleep! rem-time))) +;; +;; ;; Transfer *db-last-access* to last-access to use in checking that we are still alive +;; (set! last-access *db-last-access*) +;; +;; (if (< count 1) ;; 3x3 = 9 secs aprox +;; (loop (+ count 1) bad-sync-count (current-milliseconds))) +;; +;; (if (common:low-noise-print 60 "dbstats") +;; (begin +;; (debug:print 0 *default-log-port* "Server stats:") +;; (db:print-current-query-stats))) +;; (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) +;; (cond +;; ((not *server-run*) +;; (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") +;; (shutdown-server-sequence (get-host-name) port)) +;; ((timed-out?) +;; (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) +;; (shutdown-server-sequence (get-host-name) port)) +;; ((and *server-run* +;; (or (not (timed-out?)) +;; (if is-main ;; do not exit if there are other servers (keep main open until all others gone) +;; (> (rmt:get-count-servers sinfo *toppath*) 1) +;; #f))) +;; (if (common:low-noise-print 120 "server continuing") +;; (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) +;; (loop 0 bad-sync-count (current-milliseconds))) +;; (else +;; (set! *unclean-shutdown* #f) +;; (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) +;; (shutdown-server-sequence (get-host-name) port) +;; #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " +;; (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown +;; (sexpr->string 'quit)))))))))) +;; +;; (define (rmt:get-reasonable-hostname) +;; (let* ((inhost (or (args:get-arg "-server") "-"))) +;; (if (equal? inhost "-") +;; (get-host-name) +;; inhost))) +;; +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; This is the point at which servers are started +;; +(define (rmt:server-launch dbname) + (debug:print-info 0 *default-log-port* "Entered rmt:server-launch") + #;(let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (rmt:run (rmt:get-reasonable-hostname))) + "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (if (args:get-arg "-server") + (rmt:keep-running dbname))) + "Keep running"))) + (thread-start! th2) + (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (thread-join! th3)) + #f) +;; +;; ;;====================================================================== +;; ;; S E R V E R - D I R E C T C A L L S +;; ;;====================================================================== +;; +;; (define (rmt:kill-server run-id) +;; (rmt:send-receive 'kill-server #f (list run-id))) +;; +;; (define (rmt:start-server run-id) +;; (rmt:send-receive 'start-server #f (list run-id))) +;; +;; (define (rmt:server-info apath dbname) +;; (rmt:send-receive 'get-server-info #f (list apath dbname))) +;; +;; ;;====================================================================== +;; ;; Nanomsg transport +;; ;;====================================================================== +;; +;; #;(define (is-port-in-use port-num) +;; (let* ((ret #f)) +;; (let-values (((inp oup pid) +;; (process "netstat" (list "-tulpn" )))) +;; (let loop ((inl (read-line inp))) +;; (if (not (eof-object? inl)) +;; (begin +;; (if (string-search (regexp (conc ":" port-num)) inl) +;; (begin +;; ;(print "Output: " inl) +;; (set! ret #t)) +;; (loop (read-line inp))))))) +;; ret)) +;; +;; #;(define (open-nn-connection host-port) +;; (let ((req (make-req-socket)) +;; (uri (conc "tcp://" host-port))) +;; (nng-dial req uri) +;; (socket-set! req 'nng/recvtimeo 2000) +;; req)) +;; +;; #;(define (send-receive-nn req msg) +;; (nng-send req msg) +;; (nng-recv req)) +;; +;; #;(define (close-nn-connection req) +;; (nng-close! req)) +;; +;; ;; ;; open connection to server, send message, close connection +;; ;; ;; +;; ;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds +;; ;; (let ((req (make-req-socket 'req)) +;; ;; (uri (conc "tcp://" host-port)) +;; ;; (res #f) +;; ;; ;; (contacts (alist-ref 'contact attrib)) +;; ;; ;; (mode (alist-ref 'mode attrib)) +;; ;; ) +;; ;; (socket-set! req 'nng/recvtimeo 2000) +;; ;; (handle-exceptions +;; ;; exn +;; ;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) +;; ;; ;; Send notification +;; ;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) +;; ;; #f) +;; ;; (nng-dial req uri) +;; ;; ;; (print "Connected to the server " ) +;; ;; (nng-send req msg) +;; ;; ;; (print "Request Sent") +;; ;; (let* ((th1 (make-thread (lambda () +;; ;; (let ((resp (nng-recv req))) +;; ;; (nng-close! req) +;; ;; (set! res (if (equal? resp "ok") +;; ;; #t +;; ;; #f)))) +;; ;; "recv thread")) +;; ;; (th2 (make-thread (lambda () +;; ;; (thread-sleep! timeout) +;; ;; (thread-terminate! th1)) +;; ;; "timer thread"))) +;; ;; (thread-start! th1) +;; ;; (thread-start! th2) +;; ;; (thread-join! th1) +;; ;; res)))) +;; ;; +;; #;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds +;; (let ((req (make-req-socket)) +;; (uri (conc "tcp://" host-port)) +;; (res #f)) +;; (handle-exceptions +;; exn +;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) +;; ;; Send notification +;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn) +;; #f) +;; (nng-dial req uri) +;; (nng-send req msg) +;; (let* ((th1 (make-thread (lambda () +;; (let ((resp (nng-recv req))) +;; (nng-close! req) +;; ;; (print resp) +;; (set! res resp))) +;; "recv thread")) +;; (th2 (make-thread (lambda () +;; (thread-sleep! timeout) +;; (thread-terminate! th1)) +;; "timer thread"))) +;; (thread-start! th1) +;; (thread-start! th2) +;; (thread-join! th1) +;; res)))) +;; +;; ;;====================================================================== +;; ;; S E R V E R U T I L I T I E S +;; ;;====================================================================== +;; +;; ;; run ping in separate process, safest way in some cases +;; ;; +;; #;(define (server:ping-server ifaceport) +;; (with-input-from-pipe +;; (conc (common:get-megatest-exe) " -ping " ifaceport) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res "NOREPLY")) +;; (if (eof-object? inl) +;; (case (string->symbol res) +;; ((NOREPLY) #f) +;; ((LOGIN_OK) #t) +;; (else #f)) +;; (loop (read-line) inl)))))) +;; +;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; ;; +;; #;(define (server:login toppath) +;; (lambda (toppath) +;; (set! *db-last-access* (current-seconds)) ;; might not be needed. +;; (if (equal? *toppath* toppath) +;; #t +;; #f))) +;; +;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; ;; (define (server:release-sync-lock) +;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; ;; (define (server:have-sync-lock?) +;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; ;; (have-lock? (car have-lock-pair)) +;; ;; (lock-time (cdr have-lock-pair)) +;; ;; (lock-age (- (current-seconds) lock-time))) +;; ;; (cond +;; ;; (have-lock? #t) +;; ;; ((>lock-age +;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; ;; (server:release-sync-lock) +;; ;; (server:have-sync-lock?)) +;; ;; (else #f)))) + +) ADDED ulex-none/ulex.scm Index: ulex-none/ulex.scm ================================================================== --- /dev/null +++ ulex-none/ulex.scm @@ -0,0 +1,569 @@ +;; ulex: Distributed sqlite3 db +;;; +;; Copyright (C) 2018-2021 Matt Welland +;; Redistribution and use in source and binary forms, with or without +;; modification, is permitted. +;; +;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +;; DAMAGE. + +;;====================================================================== +;; ABOUT: +;; See README in the distribution at https://www.kiatoa.com/fossils/ulex +;; NOTES: +;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. +;; +;;====================================================================== + +(module ulex + * + #;( + + ;; NOTE: looking for the handler proc - find the run-listener :) + + run-listener ;; (run-listener handler-proc [port]) => uconn + + ;; NOTE: handler-proc params; + ;; (handler-proc rem-host-port qrykey cmd params) + + send-receive ;; (send-receive uconn host-port cmd data) + + ;; NOTE: cmd can be any plain text symbol except for these; + ;; 'ping 'ack 'goodbye 'response + + set-work-handler ;; (set-work-handler proc) + + wait-and-close ;; (wait-and-close uconn) + + ulex-listener? + + ;; needed to get the interface:port that was automatically found + udat-port + udat-host-port + + ;; for testing only + ;; pp-uconn + + ;; parameters + work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct + return-method ;; parameter; 'mailbox, 'polling, 'direct + ) + +(import scheme + chicken.base + chicken.file + chicken.io + chicken.time + chicken.condition + chicken.string + chicken.sort + chicken.pretty-print + + address-info + mailbox + matchable + ;; queues + regex + regex-case + simple-exceptions + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + system-information + tcp6 + typed-records + ) + +;; ;; udat struct, used by both caller and callee +;; ;; instantiated as uconn by convention +;; ;; +;; (defstruct udat +;; ;; the listener side +;; (port #f) +;; (host-port #f) +;; (socket #f) +;; ;; the peers +;; (peers (make-hash-table)) ;; host:port->peer +;; ;; work handling +;; (work-queue (make-mailbox)) +;; (work-proc #f) ;; set by user +;; (cnum 0) ;; cookie number +;; (mboxes (make-hash-table)) ;; for the replies +;; (avail-cmboxes '()) ;; list of ( . ) for re-use +;; ;; threads +;; (numthreads 10) +;; (cmd-thread #f) +;; (work-queue-thread #f) +;; (num-threads-running 0) +;; ) +;; +;; ;; Parameters +;; +;; ;; work-method: +;; (define work-method (make-parameter 'mailbox)) +;; ;; mailbox - all rdat goes through mailbox +;; ;; threads - all rdat immediately executed in new thread +;; ;; direct - no queuing +;; ;; +;; +;; ;; return-method, return the result to waiting send-receive: +;; (define return-method (make-parameter 'mailbox)) +;; ;; mailbox - create a mailbox and use it for passing returning results to send-receive +;; ;; polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result +;; ;; direct - no queuing, result is passed back in single tcp connection +;; ;; +;; +;; ;; ;; struct for keeping track of others we are talking to +;; ;; ;; +;; ;; (defstruct pdat +;; ;; (host-port #f) +;; ;; (conns '()) ;; list of pcon structs, pop one off when calling the peer +;; ;; ) +;; ;; +;; ;; ;; struct for peer connections, keep track of expiration etc. +;; ;; ;; +;; ;; (defstruct pcon +;; ;; (inp #f) +;; ;; (oup #f) +;; ;; (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59) +;; ;; (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes +;; ;; ) +;; +;; ;;====================================================================== +;; ;; listener +;; ;;====================================================================== +;; +;; ;; is uconn a ulex connector (listener) +;; ;; +;; (define (ulex-listener? uconn) +;; (udat? uconn)) +;; +;; ;; create a tcp listener and return a populated udat struct with +;; ;; my port, address, hostname, pid etc. +;; ;; return #f if fail to find a port to allocate. +;; ;; +;; ;; if udata-in is #f create the record +;; ;; if there is already a serv-listener return the udata +;; ;; +;; (define (setup-listener uconn #!optional (port 4242)) +;; (handle-exceptions +;; exn +;; (if (< port 65535) +;; (setup-listener uconn (+ port 1)) +;; #f) +;; (connect-listener uconn port))) +;; +;; (define (connect-listener uconn port) +;; ;; (tcp-listener-socket LISTENER)(socket-name so) +;; ;; sockaddr-address, sockaddr-port, sockaddr->string +;; (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) +;; (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) +;; (udat-port-set! uconn port) +;; (udat-host-port-set! uconn (conc addr":"port)) +;; (udat-socket-set! uconn tlsn) +;; uconn)) +;; +;; ;; run-listener does all the work of starting a listener in a thread +;; ;; it then returns control +;; ;; +;; (define (run-listener handler-proc #!optional (port-suggestion 4242)) +;; (let* ((uconn (make-udat))) +;; (udat-work-proc-set! uconn handler-proc) +;; (if (setup-listener uconn port-suggestion) +;; (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop")) +;; (th2 (make-thread (lambda () +;; (case (work-method) +;; ((mailbox limited) +;; (process-work-queue uconn)))) +;; "Ulex work queue processor"))) +;; ;; (tcp-buffer-size 2048) +;; (thread-start! th1) +;; (thread-start! th2) +;; (udat-cmd-thread-set! uconn th1) +;; (udat-work-queue-thread-set! uconn th2) +;; (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".") +;; uconn) +;; (assert #f "ERROR: run-listener called without proper setup.")))) +;; +;; (define (wait-and-close uconn) +;; (thread-join! (udat-cmd-thread uconn)) +;; (tcp-close (udat-socket uconn))) +;; +;; ;;====================================================================== +;; ;; peers and connections +;; ;;====================================================================== +;; +;; (define *send-mutex* (make-mutex)) +;; +;; ;; send structured data to recipient +;; ;; +;; ;; NOTE: qrykey is what was called the "cookie" previously +;; ;; +;; ;; retval tells send to expect and wait for return data (one line) and return it or time out +;; ;; this is for ping where we don't want to necessarily have set up our own server yet. +;; ;; +;; ;; NOTE: see below for beginnings of code to allow re-use of tcp connections +;; ;; - I believe (without substantial evidence) that re-using connections will +;; ;; be beneficial ... +;; ;; +;; (define (send udata host-port qrykey cmd params) +;; (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this +;; (isme #f #;(equal? host-port my-host-port)) ;; calling myself? +;; ;; dat is a self-contained work block that can be sent or handled locally +;; (dat (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds))))) +;; (cond +;; (isme (ulex-handler udata dat)) ;; no transmission needed +;; (else +;; (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? +;; exn +;; (message exn) +;; (begin +;; ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; (let-values (((inp oup)(tcp-connect host-port))) +;; (let ((res (if (and inp oup) +;; (begin +;; (serialize dat oup) +;; (close-output-port oup) +;; (deserialize inp) +;; ) +;; (begin +;; (print "ERROR: send called but no receiver has been setup. Please call setup first!") +;; #f)))) +;; (close-input-port inp) +;; ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; res)))))))) ;; res will always be 'ack unless return-method is direct +;; +;; (define (send-via-polling uconn host-port cmd data) +;; (let* ((qrykey (make-cookie uconn)) +;; (sres (send uconn host-port qrykey cmd data))) +;; (case sres +;; ((ack) +;; (let loop ((start-time (current-milliseconds))) +;; (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout +;; (begin +;; (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data) +;; #f) +;; (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash +;; (if result ;; result is '(status . result-data) or #f for nothing yet +;; (begin +;; (hash-table-delete! (udat-mboxes uconn) qrykey) +;; (cdr result)) +;; (begin +;; (thread-sleep! 0.01) +;; (loop start-time))))))) +;; (else +;; (print "ULEX ERROR: Communication failed? sres="sres) +;; #f)))) +;; +;; (define (send-via-mailbox uconn host-port cmd data) +;; (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? +;; (qrykey (car cmbox)) +;; (mbox (cdr cmbox)) +;; (mbox-time (current-milliseconds)) +;; (sres (send uconn host-port qrykey cmd data))) ;; short res +;; (if (eq? sres 'ack) +;; (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread))) +;; #f +;; 120)) ;; timeout) +;; (mbox-timeout-result 'MBOX_TIMEOUT) +;; (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) +;; (mbox-receive-time (current-milliseconds))) +;; ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? +;; (hash-table-delete! (udat-mboxes uconn) qrykey) +;; (if (eq? res 'MBOX_TIMEOUT) +;; (begin +;; (print "WARNING: mbox timed out for query "cmd", with data "data +;; ", waiting for response from "host-port".") +;; +;; ;; here it might make sense to clean up connection records and force clean start? +;; ;; NO. The progam using ulex needs to do the reset. Right thing here is exception +;; +;; #f) ;; convert to raising exception? +;; res)) +;; (begin +;; (print "ERROR: Communication failed? Got "sres) +;; #f)))) +;; +;; ;; send a request to the given host-port and register a mailbox in udata +;; ;; wait for the mailbox data and return it +;; ;; +;; (define (send-receive uconn host-port cmd data) +;; (let* ((start-time (current-milliseconds)) +;; (result (cond +;; ((member cmd '(ping goodbye)) ;; these are immediate +;; (send uconn host-port 'ping cmd data)) +;; ((eq? (work-method) 'direct) +;; ;; the result from send will be the actual result, not an 'ack +;; (send uconn host-port 'direct cmd data)) +;; (else +;; (case (return-method) +;; ((polling) +;; (send-via-polling uconn host-port cmd data)) +;; ((mailbox) +;; (send-via-mailbox uconn host-port cmd data)) +;; (else +;; (print "ULEX ERROR: unrecognised return-method "(return-method)".") +;; #f))))) +;; (duration (- (current-milliseconds) start-time))) +;; ;; this is ONLY for development and debugging. It will be removed once Ulex is stable. +;; (if (< 5000 duration) +;; (print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000))) +;; " seconds; "cmd", host-port="host-port", data="data)) +;; result)) +;; +;; +;; ;;====================================================================== +;; ;; responder side +;; ;;====================================================================== +;; +;; ;; take a request, rdat, and if not immediate put it in the work queue +;; ;; +;; ;; Reserved cmds; ack ping goodbye response +;; ;; +;; (define (ulex-handler uconn rdat) +;; (assert (list? rdat) "FATAL: ulex-handler give rdat as not list") +;; (match rdat ;; (string-split controldat) +;; ((rem-host-port qrykey cmd params);; timedata) +;; ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params) +;; (case cmd +;; ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack) +;; ((ping) +;; ;; (print "Got Ping!") +;; ;; (add-to-work-queue uconn rdat) +;; 'ack) +;; ((goodbye) +;; ;; just clear out references to the caller. NOT COMPLETE +;; (add-to-work-queue uconn rdat) +;; 'ack) +;; ((response) ;; this is a result from remote processing, send it as mail ... +;; (case (return-method) +;; ((polling) +;; (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params)) +;; 'ack) +;; ((mailbox) +;; (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) +;; (if mbox +;; (begin +;; (mailbox-send! mbox params) ;; params here is our result +;; 'ack) +;; (begin +;; (print "ERROR: received result but no associated mbox for cookie "qrykey) +;; 'no-mbox-found)))) +;; (else (print "ULEX ERROR: unrecognised return-method "(return-method)) +;; 'bad-return-method))) +;; (else ;; generic request - hand it to the work queue +;; (add-to-work-queue uconn rdat) +;; 'ack))) +;; (else +;; (print "ULEX ERROR: bad rdat "rdat) +;; 'bad-rdat))) +;; +;; ;; given an already set up uconn start the cmd-loop +;; ;; +;; (define (ulex-cmd-loop uconn) +;; (let* ((serv-listener (udat-socket uconn)) +;; (listener (lambda () +;; (let loop ((state 'start)) +;; (let-values (((inp oup)(tcp-accept serv-listener))) +;; ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params) +;; (resp (ulex-handler uconn rdat))) +;; (serialize resp oup) +;; (close-input-port inp) +;; (close-output-port oup) +;; ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; ) +;; (loop state)))))) +;; ;; start N of them +;; (let loop ((thnum 0) +;; (threads '())) +;; (if (< thnum 100) +;; (let* ((th (make-thread listener (conc "listener" thnum)))) +;; (thread-start! th) +;; (loop (+ thnum 1) +;; (cons th threads))) +;; (map thread-join! threads))))) +;; +;; ;; add a proc to the cmd list, these are done symetrically (i.e. in all instances) +;; ;; so that the proc can be dereferenced remotely +;; ;; +;; (define (set-work-handler uconn proc) +;; (udat-work-proc-set! uconn proc)) +;; +;; ;;====================================================================== +;; ;; work queues - this is all happening on the listener side +;; ;;====================================================================== +;; +;; ;; rdat is (rem-host-port qrykey cmd params) +;; +;; (define (add-to-work-queue uconn rdat) +;; #;(queue-add! (udat-work-queue uconn) rdat) +;; (case (work-method) +;; ((threads) +;; (thread-start! (make-thread (lambda () +;; (do-work uconn rdat)) +;; "worker thread"))) +;; ((mailbox) +;; (mailbox-send! (udat-work-queue uconn) rdat)) +;; ((direct) +;; (do-work uconn rdat)) +;; (else +;; (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.") +;; (mailbox-send! (udat-work-queue uconn) rdat)))) +;; +;; ;; move the logic to return the result somewhere else? +;; ;; +;; (define (do-work uconn rdat) +;; (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change +;; ;; put this following into a do-work procedure +;; (match rdat +;; ((rem-host-port qrykey cmd params) +;; (let* ((start-time (current-milliseconds)) +;; (result (proc rem-host-port qrykey cmd params)) +;; (end-time (current-milliseconds)) +;; (run-time (- end-time start-time))) +;; (case (work-method) +;; ((direct) result) +;; (else +;; (print "ULEX: work "cmd", "params" done in "run-time" ms") +;; ;; send 'response as cmd and result as params +;; (send uconn rem-host-port qrykey 'response result) ;; could check for ack +;; (print "ULEX: response sent back to "rem-host-port" in "(- (current-milliseconds) end-time)))))) +;; (MBOX_TIMEOUT 'do-work-timeout) +;; (else +;; (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params"))))) +;; +;; ;; NEW APPROACH: +;; ;; +;; (define (process-work-queue uconn) +;; (let ((wqueue (udat-work-queue uconn)) +;; (proc (udat-work-proc uconn)) +;; (numthr (udat-numthreads uconn))) +;; (let loop ((thnum 1) +;; (threads '())) +;; (let ((thlst (cons (make-thread (lambda () +;; (let work-loop () +;; (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT))) +;; (do-work uconn rdat)) +;; (work-loop))) +;; (conc "work thread " thnum)) +;; threads))) +;; (if (< thnum numthr) +;; (loop (+ thnum 1) +;; thlst) +;; (begin +;; (print "ULEX: Starting "(length thlst)" worker threads.") +;; (map thread-start! thlst) +;; (print "ULEX: Threads started. Joining all.") +;; (map thread-join! thlst))))))) +;; +;; ;; below was to enable re-use of connections. This seems non-trivial so for +;; ;; now lets open on each call +;; ;; +;; ;; ;; given host-port get or create peer struct +;; ;; ;; +;; ;; (define (udat-get-peer uconn host-port) +;; ;; (or (hash-table-ref/default (udat-peers uconn) host-port #f) +;; ;; ;; no peer, so create pdat and init it +;; ;; +;; ;; ;; NEED stack of connections, pop and use; inp, oup, +;; ;; ;; creation_time (remove and create new if over 24hrs old +;; ;; ;; +;; ;; (let ((pdat (make-pdat host-port: host-port))) +;; ;; (hash-table-set! (udat-peers uconn) host-port pdat) +;; ;; pdat))) +;; ;; +;; ;; ;; is pcon alive +;; ;; +;; ;; ;; given host-port and pdat get a pcon +;; ;; ;; +;; ;; (define (pdat-get-pcon pdat host-port) +;; ;; (let loop ((conns (pdat-conns pdat))) +;; ;; (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later +;; ;; (init-pcon (make-pcon)) +;; ;; (let* ((conn (pop conns))) +;; ;; +;; ;; ;; given host-port get a pcon struct +;; ;; ;; +;; ;; (define (udat-get-pcon +;; +;; ;;====================================================================== +;; ;; misc utils +;; ;;====================================================================== +;; +;; (define (make-cookie uconn) +;; (let ((newcnum (+ (udat-cnum uconn) 1))) +;; (udat-cnum-set! uconn newcnum) +;; (conc (udat-host-port uconn) ":" +;; newcnum))) +;; +;; ;; cookie/mboxes +;; +;; ;; we store each mbox with a cookie ( . ) +;; ;; +;; (define (get-cmbox uconn) +;; (if (null? (udat-avail-cmboxes uconn)) +;; (let ((cookie (make-cookie uconn)) +;; (mbox (make-mailbox))) +;; (hash-table-set! (udat-mboxes uconn) cookie mbox) +;; `(,cookie . ,mbox)) +;; (let ((cmbox (car (udat-avail-cmboxes uconn)))) +;; (udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn))) +;; cmbox))) +;; +;; (define (put-cmbox uconn cmbox) +;; (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn)))) +;; +;; (define (pp-uconn uconn) +;; (pp (udat->alist uconn))) +;; +;; +;; ;;====================================================================== +;; ;; network utilities +;; ;;====================================================================== +;; +;; ;; NOTE: Look at address-info egg as alternative to some of this +;; +;; (define (rate-ip ipaddr) +;; (regex-case ipaddr +;; ( "^127\\..*" _ 0 ) +;; ( "^(10\\.0|192\\.168)\\..*" _ 1 ) +;; ( else 2 ) )) +;; +;; ;; Change this to bias for addresses with a reasonable broadcast value? +;; ;; +;; (define (ip-pref-less? a b) +;; (> (rate-ip a) (rate-ip b))) +;; +;; (define (get-my-best-address) +;; (let ((all-my-addresses (get-all-ips))) +;; (cond +;; ((null? all-my-addresses) +;; (get-host-name)) ;; no interfaces? +;; ((eq? (length all-my-addresses) 1) +;; (car all-my-addresses)) ;; only one to choose from, just go with it +;; (else +;; (car (sort all-my-addresses ip-pref-less?)))))) +;; +;; (define (get-all-ips-sorted) +;; (sort (get-all-ips) ip-pref-less?)) +;; +;; (define (get-all-ips) +;; (map address-info-host +;; (filter (lambda (x) +;; (equal? (address-info-type x) "tcp")) +;; (address-infos (get-host-name))))) +;; +) ADDED ulex-simple/dbmgr.scm Index: ulex-simple/dbmgr.scm ================================================================== --- /dev/null +++ ulex-simple/dbmgr.scm @@ -0,0 +1,1006 @@ +;;====================================================================== +;; Copyright 2022, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbmgrmod)) + +(declare (uses ulex)) +(declare (uses apimod)) +(declare (uses pkts)) +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses portloggermod)) +(declare (uses debugprint)) + +(module dbmgrmod + * + +(import scheme + chicken.base + chicken.condition + chicken.file + chicken.format + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.tcp + chicken.time + + (prefix sqlite3 sqlite3:) + matchable + md5 + message-digest + regex + s11n + srfi-1 + srfi-18 + srfi-69 + system-information + typed-records + + pkts + ulex + + commonmod + apimod + dbmod + debugprint + (prefix mtargs args:) + portloggermod + ) + +;; Configurations for server +(tcp-buffer-size 2048) +;; (max-connections 2048) + +;; info about me as a listener and my connections to db servers +;; stored (for now) in *db-serv-info* +;; +(defstruct servdat + (host #f) + (port #f) + (uuid #f) + (dbfile #f) + (uconn #f) ;; this is the listener for this process + (mode #f) + (status 'starting) + (trynum 0) ;; count the number of ports we've tried + (conns (make-hash-table)) ;; apath/dbname => conndat + ) + +(define *db-serv-info* #f) + +(serializing-method 'complex) ;; write, s11n, complex + +(define (servdat->url sdat) + (conc (servdat-host sdat)":"(servdat-port sdat))) + +;; db servers contact info +;; +(defstruct conndat + (apath #f) + (dbname #f) + (fullname #f) + (hostport #f) + (ipaddr #f) + (port #f) + (srvpkt #f) + (srvkey #f) + (lastmsg 0) + (expires 0)) + +(define *srvpktspec* + `((server (host . h) + (port . p) + (servkey . k) + (pid . i) + (ipaddr . a) + (dbpath . d)))) + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +;; set up the api proc, seems like there should be a better place for this? +;; +;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE +;; +;; (define api-proc (make-parameter conc)) +;; (api-proc api:execute-requests) + +;; do we have a connection to apath dbname and +;; is it not expired? then return it +;; +;; else setup a connection +;; +;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception +;; +(define (rmt:get-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-ref/default (servdat-conns remdat) fullname #f))) + +(define (rmt:drop-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-delete! (servdat-conns remdat) fullname))) + +(define (rmt:find-main-server uconn apath dbname) + (let* ((pktsdir (get-pkts-dir apath)) + (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) + (viable-srvs (get-viable-servers all-srvpkts dbname))) + (get-the-server uconn apath viable-srvs))) + + +(define *connstart-mutex* (make-mutex)) +(define *last-main-start* 0) + +;; looks for a connection to main, returns if have and not exired +;; creates new otherwise +;; +;; connections for other servers happens by requesting from main +;; +;; TODO: This is unnecessarily re-creating the record in the hash table +;; +(define (rmt:open-main-connection remdat apath) + (let* ((fullpath (db:dbname->path apath ".db/main.db")) + (conns (servdat-conns remdat)) + (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this + (myconn (servdat-uconn remdat))) + (cond + ((not myconn) + (servdat-uconn-set! remdat (make-udat)) + (rmt:open-main-connection remdat apath)) + ((and conn ;; conn is NOT a socket, just saying ... + (< (current-seconds) (conndat-expires conn))) + #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died + ((and conn + (>= (current-seconds)(conndat-expires conn))) + (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") + (rmt:drop-conn remdat apath ".db/main.db") ;; + (rmt:open-main-connection remdat apath)) + (else + ;; Below we will find or create and connect to main + (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch") + (let* ((dbname (db:run-id->dbname #f)) + (the-srv (rmt:find-main-server myconn apath dbname)) + (start-main-srv (lambda () ;; call IF there is no the-srv found + (mutex-lock! *connstart-mutex*) + (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server + (begin + (api:run-server-process apath dbname) + (set! *last-main-start* (current-seconds)) + (thread-sleep! 1)) + (thread-sleep! 0.25)) + (mutex-unlock! *connstart-mutex*) + (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries + ))) + (if (not the-srv) ;; have server, try connecting to it + (start-main-srv) + (let* ((srv-addr (server-address the-srv)) ;; need serv + (ipaddr (alist-ref 'ipaddr the-srv)) + (port (alist-ref 'port the-srv)) + (srvkey (alist-ref 'servkey the-srv)) + (fullpath (db:dbname->path apath dbname)) + + (new-the-srv (make-conndat + apath: apath + dbname: dbname + fullname: fullpath + hostport: srv-addr + ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvpkt: the-srv + srvkey: srvkey ;; generated by rmt:get-signature on the server side + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2) ;; this needs to be gathered during the ping + ))) + (hash-table-set! conns fullpath new-the-srv))) + #t))))) + +;; NB// sinfo is a servdat struct +;; +(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5)) + (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") + (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable + (fullname (db:dbname->path apath dbname)) + (conns (servdat-conns sinfo)) + (mconn (rmt:get-conn sinfo apath ".db/main.db")) + (dconn (rmt:get-conn sinfo apath dbname))) + #;(if (and mconn + (not (debug:print-logger))) + (begin + (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") + (debug:print-logger rmt:log-to-main))) + (cond + ((and mconn + dconn + (< (current-seconds)(conndat-expires dconn))) + #t) ;; good to go + ((not mconn) ;; no channel open to main? open it... + (rmt:open-main-connection sinfo apath) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + ((not dconn) ;; no channel open to dbname? + (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname)))) + (case res + ((server-started) + (if (> num-tries 0) + (begin + (thread-sleep! 2) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + (begin + (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) + (exit 1)))) + (else + (if (list? res) ;; server has been registered and the info was returned. pass it on. + (begin ;; ("192.168.0.9" 53817 + ;; "5e34239f48e8973b3813221e54701a01" "24310" + ;; "192.168.0.9" + ;; "/home/matt/data/megatest/tests/simplerun" + ;; ".db/1.db") + (match + res + ((host port servkey pid ipaddr apath dbname) + (debug:print-info 0 *default-log-port* "got "res) + (hash-table-set! conns + fullname + (make-conndat + apath: apath + dbname: dbname + hostport: (conc host":"port) + ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvkey: servkey + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2)))) + (else + (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) + res) + (begin + (debug:print-info 0 *default-log-port* "Unexpected result: " res) + res))))))) + #t)) + +;;====================================================================== + +;; FOR DEBUGGING SET TO #t +;; (define *localmode* #t) +(define *localmode* #f) +(define *dbstruct* (make-dbr:dbstruct)) + +;; Defaults to current area +;; +(define (rmt:send-receive-attempted-consolidation cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (sinfo *db-serv-info*) + (dbname (db:run-id->dbname rid))) + (if (not *db-serv-info*) + (begin + (set! *db-serv-info* (make-servdat)) + (set! sinfo *db-serv-info*))) + (rmt:open-main-connection sinfo apath) + (if rid (rmt:general-open-connection sinfo apath dbname)) + ;; (if (not (member cmd '(log-to-main))) + ;; (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) + (let* ((cdat (rmt:get-conn sinfo apath dbname))) + (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn (conndat-hostport cdat) cmd params))) + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -2)) ;; two second margin for network time misalignments etc. + res)))) + +; Defaults to current area +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (sinfo *db-serv-info*) + (dbname (db:run-id->dbname rid))) + (if (not *db-serv-info*) ;; confirm this is really needed + (begin + (set! *db-serv-info* (make-servdat)) + (set! sinfo *db-serv-info*))) + (rmt:open-main-connection sinfo apath) + (if rid (rmt:general-open-connection sinfo apath dbname)) + #;(if (not (member cmd '(log-to-main))) + (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) + (rmt:send-receive-real sinfo apath dbname cmd params))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future +;; +(define (rmt:send-receive-real sinfo apath dbname cmd params) + (let* ((cdat (rmt:get-conn sinfo apath dbname))) + (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn (conndat-hostport cdat) cmd params))) + ;; since we accessed the server we can bump the expires time up + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -2)) ;; two second margin for network time misalignments etc. + res))) + +; +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future. +;; +;; Purpose - call the main.db server and request a server be started +;; for the given area path and dbname +;; + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +;; host and port are used to ensure we are remove proper records +(define (rmt:server-shutdown host port) + (let ((dbfile (servdat-dbfile *db-serv-info*))) + (debug:print-info 0 *default-log-port* "dbfile is "dbfile) + (if dbfile + (let* ((am-server (args:get-arg "-server")) + (dbfile (args:get-arg "-db")) + (apath *toppath*) + #;(sinfo *remotedat*)) ;; foundation for future fix + (if *dbstruct-db* + (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) + (db (dbr:dbdat-db dbdat)) + (inmem (dbr:dbdat-db dbdat)) ;; WRONG + ) + ;; do a final sync here + (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) + (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) + ;; let's finalize here + (debug:print-info 0 *default-log-port* "Finalizing db and inmem") + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) + (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) + (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") + (if (string-match ".*/main.db$" dbfile) + (let ((pkt-file (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) + (delete-file* pkt-file) + (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) + (db:with-lock-db + (servdat-dbfile *db-serv-info*) + (lambda (dbh dbfile) + (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove + (let* ((sdat *db-serv-info*) ;; we have a run-id server + (host (servdat-host sdat)) + (port (servdat-port sdat)) + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) + (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) + ))))))) + + +(define (common:run-sync?) + ;; (and (common:on-homehost?) + (args:get-arg "-server")) + +(define *rmt:run-mutex* (make-mutex)) +(define *rmt:run-flag* #f) + +(define (listener-running?) + (and *db-serv-info* + (servdat-uconn *db-serv-info*))) + +;; Main entry point to start a server. was start-server +(define (rmt:run hostn) + (mutex-lock! *rmt:run-mutex*) + (if *rmt:run-flag* + (begin + (debug:print-warn 0 *default-log-port* "rmt:run already running.") + (mutex-unlock! *rmt:run-mutex*)) + (begin + (set! *rmt:run-flag* #t) + (mutex-unlock! *rmt:run-mutex*) + ;; ;; Configurations for server + ;; (tcp-buffer-size 2048) + ;; (max-connections 2048) + (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") + (if (listener-running?) + (let* ((uconn (servdat-uconn *db-serv-info*))) + (wait-and-close uconn)) + (let* ((port (portlogger:open-run-close portlogger:find-port)) + (handler-proc (lambda (rem-host-port qrykey cmd params) ;; + (set! *db-last-access* (current-seconds)) + (assert (list? params) "FATAL: handler called with non-list params") + (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params) + (debug:print 0 *default-log-port* "handler call: "cmd", params="params) + (api:execute-requests *dbstruct-db* cmd params)))) + ;; (api:process-request *dbstuct-db* + (if (not *db-serv-info*) + (set! *db-serv-info* (make-servdat host: hostn port: port))) + (let* ((uconn (run-listener handler-proc port)) + (rport (udat-port uconn))) ;; the real port + (servdat-host-set! *db-serv-info* hostn) + (servdat-port-set! *db-serv-info* rport) + (servdat-uconn-set! *db-serv-info* uconn) + (wait-and-close uconn) + (db:print-current-query-stats) + ))) + (let* ((host (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (mode (or (servdat-mode *db-serv-info*) + "non-db"))) + ;; server exit stuff here + ;; (rmt:server-shutdown host port) - always do in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit + (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting") + )))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + + +;;====================================================================== +;; NEW SERVER METHOD +;;====================================================================== + +;; only use for main.db - need to re-write some of this :( +;; +(define (get-lock-db sdat dbfile host port) + (assert host "FATAL: get-lock-db called with host not set.") + (assert port "FATAL: get-lock-db called with port not set.") + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile host port)) + (uconn (servdat-uconn sdat))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? uconn (conc owner_host":"owner_port) "abc") + #f ;; locked by someone else + (begin ;; locked by someone dead and gone + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) + (sqlite3:finalize! dbh) + res)) + + +(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) + (let* ((pkt-dat `((host . ,host) + (port . ,port) + (servkey . ,servkey) + (pid . ,(current-process-id)) + (ipaddr . ,ipaddr) + (dbpath . ,dbpath))) + (uuid (write-alist->pkt + pkts-dir + pkt-dat + pktspec: pkt-spec + ptype: 'server))) + (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) + uuid)) + +(define (get-pkts-dir #!optional (apath #f)) + (let* ((effective-toppath (or *toppath* apath))) + (assert effective-toppath + "ERROR: get-pkts-dir called without *toppath* set. Exiting.") + (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) + (if (file-exists? pdir) + pdir + (begin + (handle-exceptions ;; this exception handler should NOT be needed but ... + exn + pdir + (create-directory pdir #t)) + pdir))))) + +;; given a pkts dir read +;; +(define (get-all-server-pkts pktsdir-in pktspec) + (let* ((pktsdir (if (file-exists? pktsdir-in) + pktsdir-in + (begin + (create-directory pktsdir-in #t) + pktsdir-in))) + (all-pkt-files (glob (conc pktsdir "/*.pkt")))) + (map (lambda (pkt-file) + (read-pkt->alist pkt-file pktspec: pktspec)) + all-pkt-files))) + +(define (server-address srv-pkt) + (conc (alist-ref 'host srv-pkt) ":" + (alist-ref 'port srv-pkt))) + +(define (server-ready? uconn host-port key) ;; server-address is host:port + (let* ((params `((cmd . ping)(key . ,key))) + (data `((cmd . ping) + (key . ,key) + (params . ,params))) ;; I don't get it. + (res (send-receive uconn host-port 'ping data))) + (if (eq? res 'ping-ack) ;; yep, likely it is who we want on the other end + res + #f))) +;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f)))) + +; from the pkts return servers associated with dbpath +;; NOTE: Only one can be alive - have to check on each +;; in the list of pkts returned +;; +(define (get-viable-servers serv-pkts dbpath) + (let loop ((tail serv-pkts) + (res '())) + (if (null? tail) + res ;; NOTE: sort by age so oldest is considered first + (let* ((spkt (car tail))) + (loop (cdr tail) + (if (equal? dbpath (alist-ref 'dbpath spkt)) + (cons spkt res) + res)))))) + +(define (remove-pkts-if-not-alive uconn serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (host-port (conc host":"port)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (server-ready? uconn host-port key))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) + +;; from viable servers get one that is alive and ready +;; +(define (get-the-server uconn apath serv-pkts) + (let loop ((tail serv-pkts)) + (if (null? tail) + #f + (let* ((spkt (car tail)) + (host (alist-ref 'ipaddr spkt)) + (port (alist-ref 'port spkt)) + (host-port (conc host":"port)) + (dbpth (alist-ref 'dbpath spkt)) + (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt)) + (addr (server-address spkt))) + (if (server-ready? uconn host-port srvkey) + spkt + (loop (cdr tail))))))) + +;; am I the "first" in line server? I.e. my D card is smallest +;; use Z card as tie breaker +;; +(define (get-best-candidate serv-pkts dbpath) + (if (null? serv-pkts) + #f + (let loop ((tail serv-pkts) + (best (car serv-pkts))) + (if (null? tail) + best + (let* ((candidate (car tail)) + (candidate-bd (string->number (alist-ref 'D candidate))) + (best-bd (string->number (alist-ref 'D best))) + ;; bigger number is younger + (candidate-z (alist-ref 'Z candidate)) + (best-z (alist-ref 'Z best)) + (new-best (cond + ((> best-bd candidate-bd) ;; best is younger than candidate + candidate) + ((< best-bd candidate-bd) ;; candidate is younger than best + best) + (else + (if (string>=? best-z candidate-z) + best + candidate))))) ;; use Z card as tie breaker + (if (null? tail) + new-best + (loop (cdr tail) new-best))))))) + +;;====================================================================== +;; END NEW SERVER METHOD +;;====================================================================== + +;; sdat must be defined and the host and port set and the same as previous +;; +(define (host-port-is-stable? sdat old-host old-port) + (and sdat + (let ((new-host (servdat-host sdat)) + (new-port (servdat-port sdat))) + (and new-host + new-port + (equal? new-host old-host) + (equal? new-port old-port))))) + +;; if .db/main.db check the pkts +;; +(define (rmt:wait-for-server pkts-dir db-file server-key) + (let* ((sdat *db-serv-info*)) + (let loop ((start-time (current-milliseconds)) + (changed #t) + (last-sdat "not this") + (last-host #f) + (last-port #f)) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *db-serv-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (>= (- (current-milliseconds) start-time) 100)) + (let* ((uconn (servdat-uconn sdat))) + (servdat-status-set! sdat 'iface-stable) + (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server") + ;; create a server pkt in *toppath*/.meta/srvpkts + + ;; TODO: + ;; 1. change sdat to stuct + ;; 2. add uuid to struct + ;; 3. update uuid in sdat here + ;; + (servdat-uuid-set! sdat + (register-server + pkts-dir *srvpktspec* + (get-host-name) + (servdat-port sdat) server-key + (servdat-host sdat) db-file)) + ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key + ;; now read pkts and see if we are a contender + (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) + (viables (get-viable-servers all-pkts db-file)) + (alive (remove-pkts-if-not-alive uconn viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) + ;; am I the best-srv, compare server-keys to know + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (begin + (debug:print-info 0 *default-log-port* "I'm the server!") + (servdat-dbfile-set! sdat db-file) + (servdat-status-set! sdat 'db-locked)) + (begin + (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + (begin + (debug:print-info 0 *default-log-port* + "Keys do not match "best-srv-key", "server-key", exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + sdat)) + (begin ;; sdat not yet contains server info + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (thread-sleep! 0.1) + (if (> (- (current-milliseconds) start-time) 120000) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (exit)) + (loop start-time + (not (host-port-is-stable? sdat last-host last-port)) + sdat + (servdat-host sdat) + (servdat-port sdat))))))))) + +(define (rmt:register-server sinfo apath iface port server-key dbname) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'register-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:get-count-servers sinfo apath) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) + +(define (rmt:deregister-server db-serv-info apath iface port server-key dbname) + (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db + (rmt:send-receive db-serv-info apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'deregister-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) + ;; wait until *db-serv-info* stops changing + (let* ((stime (current-seconds))) + (let loop ((last-host #f) + (last-port #f) + (tries 0)) + (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*))) + (curr-port (and *db-serv-info* (servdat-port *db-serv-info*)))) + ;; first we verify port and interface, update *db-serv-info* in need be. + (cond + ((> tries num-tries-allowed) + (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") + (exit 1)) + ((not *db-serv-info*) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not last-host)(not last-port)) + (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not (equal? last-host curr-host)) + (not (equal? last-port curr-port))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed + (thread-sleep! 0.5) + (loop curr-host curr-port (+ tries 1))) + (else + (rmt:get-signature) ;; sets *my-signature* as side effect + (servdat-status-set! *db-serv-info* 'interface-stable) + (debug:print 0 *default-log-port* + "SERVER STARTED: " curr-host + ":" curr-port + " AT " (current-seconds) " server signature: " *my-signature* + " with "(servdat-trynum *db-serv-info*)" port changes") + (flush-output *default-log-port*) + #t)))))) + +;; run rmt:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (rmt:keep-running dbname) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + + (let* ((sinfo *db-serv-info*) + (server-start-time (current-seconds)) + (pkts-dir (get-pkts-dir)) + (server-key (rmt:get-signature)) ;; This servers key + (is-main (equal? (args:get-arg "-db") ".db/main.db")) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (host port) + (set! *unclean-shutdown* #f) ;; Should not be needed anymore + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; (rmt:server-shutdown host port) -- called in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) + (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db")) + ;; main and run db servers have both got wait logic (could/should merge it) + (if is-main + (rmt:wait-for-server pkts-dir dbname server-key) + (rmt:wait-for-stable-interface)) + ;; this is our forever loop + (let* ((iface (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (uconn (servdat-uconn *db-serv-info*))) + (let loop ((count 0) + (bad-sync-count 0) + (start-time (current-milliseconds))) + (if (and (not is-main) + (common:low-noise-print 60 "servdat-status")) + (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*))) + + (mutex-lock! *heartbeat-mutex*) + ;; set up the database handle + (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate + (let ((watchdog (bdat-watchdog *bdat*))) + (debug:print 0 *default-log-port* "SERVER: dbprep") + (db:setup dbname) ;; sets *dbstruct-db* as side effect + (servdat-status-set! *db-serv-info* 'db-opened) + ;; IFF I'm not main, call into main and register self + (if (not is-main) + (let ((res (rmt:register-server sinfo + *toppath* iface port + server-key dbname))) + (if res ;; we are the server + (servdat-status-set! *db-serv-info* 'have-interface-and-db) + ;; now check that the db locker is alive, clear it out if not + (let* ((serv-info (rmt:server-info *toppath* dbname))) + (match serv-info + ((host port servkey pid ipaddr apath dbpath) + (if (not (server-ready? uconn (conc host":"port) servkey)) + (begin + (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") + (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) + (loop (+ count 1) bad-sync-count start-time)))) + (else + (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) + (exit))))))) + (debug:print 0 *default-log-port* + "SERVER: running, db "dbname" opened, megatest version: " + (common:get-full-version)) + )) + + (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) + + (mutex-unlock! *heartbeat-mutex*) + + ;; when things go wrong we don't want to be doing the various + ;; queries too often so we strive to run this stuff only every + ;; four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (set! last-access *db-last-access*) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) bad-sync-count (current-milliseconds))) + + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence (get-host-name) port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port)) + ((and *server-run* + (or (not (timed-out?)) + (if is-main ;; do not exit if there are other servers (keep main open until all others gone) + (> (rmt:get-count-servers sinfo *toppath*) 1) + #f))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) + (loop 0 bad-sync-count (current-milliseconds))) + (else + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port) + #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " + (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown + (sexpr->string 'quit)))))))))) + +(define (rmt:get-reasonable-hostname) + (let* ((inhost (or (args:get-arg "-server") "-"))) + (if (equal? inhost "-") + (get-host-name) + inhost))) + +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; This is the point at which servers are started +;; +(define (rmt:server-launch dbname) + (debug:print-info 0 *default-log-port* "Entered rmt:server-launch") + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (rmt:run (rmt:get-reasonable-hostname))) + "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (if (args:get-arg "-server") + (rmt:keep-running dbname) + #;(rmt:wait-for-stable-interface))) + "Keep running"))) + (thread-start! th2) + (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (thread-join! th3)) + #f) + +;;====================================================================== +;; S E R V E R - D I R E C T C A L L S +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server #f (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server #f (list run-id))) + +(define (rmt:server-info apath dbname) + (rmt:send-receive 'get-server-info #f (list apath dbname))) + + +) ADDED ulex-simple/ulex.scm Index: ulex-simple/ulex.scm ================================================================== --- /dev/null +++ ulex-simple/ulex.scm @@ -0,0 +1,355 @@ +;; ulex: Distributed sqlite3 db +;;; +;; Copyright (C) 2018-2021 Matt Welland +;; Redistribution and use in source and binary forms, with or without +;; modification, is permitted. +;; +;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +;; DAMAGE. + +;;====================================================================== +;; ABOUT: +;; See README in the distribution at https://www.kiatoa.com/fossils/ulex +;; NOTES: +;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. +;; +;;====================================================================== + +(module ulex + * + #;( + + ;; NOTE: looking for the handler proc - find the run-listener :) + + run-listener ;; (run-listener handler-proc [port]) => uconn + + ;; NOTE: handler-proc params; + ;; (handler-proc rem-host-port qrykey cmd params) + + send-receive ;; (send-receive uconn host-port cmd data) + + ;; NOTE: cmd can be any plain text symbol except for these; + ;; 'ping 'ack 'goodbye 'response + + set-work-handler ;; (set-work-handler proc) + + wait-and-close ;; (wait-and-close uconn) + + ulex-listener? + + ;; needed to get the interface:port that was automatically found + udat-port + udat-host-port + + ;; for testing only + ;; pp-uconn + + ;; parameters + work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct + return-method ;; parameter; 'mailbox, 'polling, 'direct + ) + +(import scheme + chicken.base + chicken.file + chicken.io + chicken.time + chicken.condition + chicken.port + chicken.string + chicken.sort + chicken.pretty-print + chicken.tcp + + address-info + mailbox + matchable + ;; queues + regex + regex-case + simple-exceptions + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + system-information + ;; tcp6 + tcp-server + typed-records + + md5 + message-digest + (prefix base64 base64:) + z3 + ) + +;; udat struct, used by both caller and callee +;; instantiated as uconn by convention +;; +(defstruct udat + ;; the listener side + (port #f) + (host-port #f) ;; my host:port + (socket #f) + ;; the peers + (peers (make-hash-table)) ;; host:port->peer + ;; work handling + (work-queue (make-mailbox)) + (work-proc #f) ;; set by user + (cnum 0) ;; cookie number + (mboxes (make-hash-table)) ;; for the replies + (avail-cmboxes '()) ;; list of ( . ) for re-use + ;; threads + (numthreads 10) + (cmd-thread #f) + (work-queue-thread #f) + (num-threads-running 0) + ) + +;;====================================================================== +;; serialization +;; NOTE: I've had problems with read/write and s11n serialize, deserialize +;; thus the inefficient method here +;;====================================================================== + +(define serializing-method (make-parameter 'complex)) + + +;; NOTE: Can remove the regex and base64 encoding for zmq +(define (obj->string obj) + (case (serializing-method) + ((complex) + (string-substitute + (regexp "=") "_" + (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda ()(serialize obj))))) ;; BB: serialize - this is + ;; what causes problems + ;; between different builds of + ;; megatest communicating. + ;; serialize is sensitive to + ;; binary image of mtest. + #t)) + ((write)(with-output-to-string (lambda ()(write obj)))) + ((s11n) (with-output-to-string (lambda ()(serialize obj)))) + (else obj))) ;; rpc + +(define (string->obj msg #!key (transport 'http)) + (case (serializing-method) + ((complex) + (handle-exceptions + exn + (begin + (print "ULEX ERROR: cannot translate received data \""msg"\"") + (print-call-chain (current-error-port)) + msg) + (with-input-from-string + (z3:decode-buffer + (base64:base64-decode + (string-substitute + (regexp "_") "=" msg #t))) + (lambda ()(deserialize))))) + ((write)(with-input-from-string msg (lambda ()(read)))) + ((s11n)(with-input-from-string msg (lambda ()(deserialize)))) + (else msg))) ;; rpc + + +;;====================================================================== +;; listener +;;====================================================================== + +;; is uconn a ulex connector (listener) +;; +(define (ulex-listener? uconn) + (udat? uconn)) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (setup-listener uconn #!optional (port 4242)) + (handle-exceptions + exn + (if (< port 65535) + (setup-listener uconn (+ port 1)) + #f) + (connect-listener uconn port))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (udat-port-set! uconn port) + (udat-host-port-set! uconn (conc addr":"port)) + (udat-socket-set! uconn tlsn) + uconn)) + +;; run-listener does all the work of starting a listener in a thread +;; it then returns control +;; +(define (run-listener handler-proc #!optional (port-suggestion 4242)) + (let* ((uconn (make-udat))) + (udat-work-proc-set! uconn handler-proc) + (if (setup-listener uconn port-suggestion) + (let* ((orig-in (current-input-port)) + (orig-out (current-output-port))) + ((make-tcp-server + (udat-socket uconn) + (lambda () + (let* ((rdat + (string->obj (read)) + ;; (read in) + ;; (deserialize) + ) + (resp (let ((tcp-in (current-input-port)) + (tcp-out (current-output-port))) + (current-input-port orig-in) + (current-output-port orig-out) + (let ((res (do-work uconn rdat))) + (current-input-port tcp-in) + (current-output-port tcp-out) + res)))) + (write (obj->string resp)) + ;; (serialize resp) + ;; (write resp out) + ))))) + (assert #f "ERROR: run-listener called without proper setup.")))) + +(define (wait-and-close uconn) + (thread-join! (udat-cmd-thread uconn)) + (tcp-close (udat-socket uconn))) + +;;====================================================================== +;; == << ;; peers and connections +;; == << ;;====================================================================== + +(define *send-mutex* (make-mutex)) + +;; send structured data to recipient +;; +;; NOTE: qrykey is what was called the "cookie" previously +;; +;; retval tells send to expect and wait for return data (one line) and return it or time out +;; this is for ping where we don't want to necessarily have set up our own server yet. +;; +(define (send-receive udata host-port cmd params) + (let* ((host-port-lst (string-split host-port ":")) + (host (car host-port-lst)) + (port (string->number (cadr host-port-lst))) + (my-host-port (and udata (udat-host-port udata))) ;; remote will return to this + (isme (equal? host-port my-host-port)) ;; calling myself? + ;; dat is a self-contained work block that can be sent or handled locally + (dat (list my-host-port 'qrykey cmd params #;(cons (current-seconds)(current-milliseconds))))) + (cond + (isme (do-work udata dat)) ;; no transmission needed + (else + (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? + exn + (begin + (print "ULEX send-receive: "cmd", "params", exn="exn) + (message exn)) + (begin + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP + (let-values (((inp oup)(tcp-connect host port))) + (let ((res (if (and inp oup) + (begin + (write (obj->string dat) oup) + ;; (write dat oup) + ;; (serialize dat oup) + (close-output-port oup) + (string->obj (read inp)) + ;; (read inp) + ;; (deserialize inp) + ) + (begin + (print "ERROR: send called but no receiver has been setup. Please call setup first!") + #f)))) + ;; (close-output-port oup) + (close-input-port inp) + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + res)))))))) ;; res will always be 'ack unless return-method is direct + +;;====================================================================== +;; work queues - this is all happening on the listener side +;;====================================================================== + +;; move the logic to return the result somewhere else? +;; +(define (do-work uconn rdat) + ;; put this following into a do-work procedure + (match rdat + ((rem-host-port qrykey cmd params) + (case cmd + ((ping) 'ping-ack) ;; bypass calling the proc + (else + (let* ((proc (udat-work-proc uconn)) + (start-time (current-milliseconds)) + (result (with-output-to-port (current-error-port) + (lambda () + (proc rem-host-port qrykey cmd params)))) + (end-time (current-milliseconds)) + (run-time (- end-time start-time))) + result)))) + (else + (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))) + +;;====================================================================== +;; misc utils +;;====================================================================== + +(define (pp-uconn uconn) + (pp (udat->alist uconn))) + +;;====================================================================== +;; network utilities +;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + +(define (get-my-best-address) + (let ((all-my-addresses (get-all-ips))) + (cond + ((null? all-my-addresses) + (get-host-name)) ;; no interfaces? + ((eq? (length all-my-addresses) 1) + (car all-my-addresses)) ;; only one to choose from, just go with it + (else + (car (sort all-my-addresses ip-pref-less?)))))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips) + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +) ADDED ulex-trials/Makefile Index: ulex-trials/Makefile ================================================================== --- /dev/null +++ ulex-trials/Makefile @@ -0,0 +1,16 @@ +all : a b ulex-test + +ulex-test : ulex-test.scm ../ulex/ulex.scm + csc ulex-test.scm + +a : a.scm ../ulex/ulex.scm + csc a.scm + +b : b.scm ../ulex/ulex.scm + csc b.scm + +test : ulex-test + for x in $$(seq 9);do export NBFAKE_LOG=NBFAKE_$$x;sleep 1;nbfake ./ulex-test run 828$$x;echo $$cmd;$$cmd;done + +clean : + rm -f ulex-test .runners/* NBFAKE* ADDED ulex-trials/server-one.scm Index: ulex-trials/server-one.scm ================================================================== --- /dev/null +++ ulex-trials/server-one.scm @@ -0,0 +1,61 @@ +(import tcp-server format (chicken tcp) (chicken io) (chicken string) (prefix sqlite3 sqlite3:) sql-de-lite srfi-18 simple-exceptions mailbox s11n) +(let* ((work-mailbox (make-mailbox)) + (notify-mailbox (make-mailbox)) +(th1 (make-thread (lambda () + +((make-tcp-server + (tcp-listen 6505) + (lambda () + (let* ((db (sqlite3:open-database "test.db")) + (rec-data (deserialize))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + ;;(exec (sql db "INSERT INTO entries (received,send) VALUES (?,?);") "something" (conc "Server One Response: " "something else")) + (sqlite3:execute db "INSERT INTO entries (received,send) VALUES (?,?);" "something" (conc "Server One Response: " "something else")) + (mailbox-send! work-mailbox rec-data) + (format (current-error-port) (conc rec-data)) + (write-line (conc "Response to: " (conc rec-data))) + ;;(close-database db) + ))) +#t)) +"receive")) +(th2 (make-thread (lambda () + (print "Jeff is here") + (let loop ((entries 0)) + (thread-sleep! 0.8) + (print "Preparding to send entries" entries) + (handle-exceptions exn (begin (print "Had an issue: " (message exn))(thread-sleep! 10)) + (define-values (i o) (tcp-connect "localhost" 6504)) + (serialize (list "localhost:6505" "from-server-one") o) + (print (read-line i)) + (close-input-port i) + (close-output-port o)) + (loop (+ entries 1)))) "send")) +(th3 (make-thread (lambda () + (print "In mailbox thread") + (let loop2 ((entries2 0)) + (print "Processing: " (mailbox-receive! work-mailbox)) + (mailbox-send! notify-mailbox (list 'ack)) + (thread-sleep! 0.01) + (loop2 1))) "processing")) +(th4 (make-thread (lambda () + (print "In notify-mailbox thread") + (let loop3 ((entries3 0)) + (print "Notifying: " (mailbox-receive! notify-mailbox)) + (handle-exceptions exn (begin (print "Had an issue: " (message exn))(thread-sleep! 10)) + (define-values (i o) (tcp-connect "localhost" 6504)) + (serialize (list 'ack "from-server-one") o) + (print (read-line i)) + (close-input-port i) + (close-output-port o)) + ;;(thread-sleep! 1) + (loop3 1))) "notify")) + +) +(thread-start! th1) +(thread-start! th2) +(thread-start! th3) +(thread-start! th4) +(thread-join! th2) +) + +(print "Done here") ADDED ulex-trials/server-two.scm Index: ulex-trials/server-two.scm ================================================================== --- /dev/null +++ ulex-trials/server-two.scm @@ -0,0 +1,61 @@ +(import tcp-server format (chicken tcp) (chicken io) (chicken string) (prefix sqlite3 sqlite3:) sql-de-lite srfi-18 simple-exceptions mailbox s11n) +(let* ((work-mailbox (make-mailbox)) + (notify-mailbox (make-mailbox)) +(th1 (make-thread (lambda () + +((make-tcp-server + (tcp-listen 6504) + (lambda () + (let* ((db (sqlite3:open-database "test.db")) + (rec-data (deserialize))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + ;;(exec (sql db "INSERT INTO entries (received,send) VALUES (?,?);") "something" (conc "Server One Response: " "something else")) + (sqlite3:execute db "INSERT INTO entries (received,send) VALUES (?,?);" "something" (conc "Server One Response: " "something else")) + (mailbox-send! work-mailbox rec-data) + (format (current-error-port) (conc rec-data)) + (write-line (conc "Response to: " (conc rec-data))) + ;;(close-database db) + ))) +#t)) +"receive")) +(th2 (make-thread (lambda () + (print "Jeff is here") + (let loop ((entries 0)) + (thread-sleep! 0.8) + (print "Preparding to send entries" entries) + (handle-exceptions exn (begin (print "Had an issue: " (message exn))(thread-sleep! 10)) + (define-values (i o) (tcp-connect "localhost" 6505)) + (serialize (list "localhost:6505" "from-server-two") o) + (print (read-line i)) + (close-input-port i) + (close-output-port o)) + (loop (+ entries 1)))) "send")) +(th3 (make-thread (lambda () + (print "In mailbox thread") + (let loop2 ((entries2 0)) + (print "Processing: " (mailbox-receive! work-mailbox)) + (mailbox-send! notify-mailbox (list 'ack)) + (thread-sleep! 0.5) + (loop2 1))) "processing")) +(th4 (make-thread (lambda () + (print "In notify-mailbox thread") + (let loop3 ((entries3 0)) + (print "Notifying: " (mailbox-receive! notify-mailbox)) + (handle-exceptions exn (begin (print "Had an issue: " (message exn))(thread-sleep! 10)) + (define-values (i o) (tcp-connect "localhost" 6505)) + (serialize (list 'ack "from-server-two") o) + (print (read-line i)) + (close-input-port i) + (close-output-port o)) + ;;(thread-sleep! 1) + (loop3 1))) "notify")) + +) +(thread-start! th1) +(thread-start! th2) +(thread-start! th3) +(thread-start! th4) +(thread-join! th2) +) + +(print "Done here") ADDED ulex-trials/ulex-test.scm Index: ulex-trials/ulex-test.scm ================================================================== --- /dev/null +++ ulex-trials/ulex-test.scm @@ -0,0 +1,101 @@ +(include "../ulex/ulex.scm") + +(module ulex-test * + +(import scheme + (chicken io) + (chicken base) + (chicken time) + (chicken file) + (chicken file posix) + (chicken string) + (chicken process-context) + (chicken process-context posix) + miscmacros +;; nng + srfi-18 + srfi-69 + test + matchable + typed-records + system-information + directory-utils + + ulex + ) + +(define help "Usage: ulex-test COMMAND + where COMMAND is one of: + run host:port : start test server - start several in same dir +") + +(define (call uconn msg addr) + (print "Sent: "msg" to " addr ", received: " + (send-receive uconn addr 'hello msg))) + +;; start => hello 0 +;; hello 0 => hello 1 +;; hello 1 => hello 2 +;; ... +;; hello 11 => 'done +;; +(define (process-message mesg) + (print "In process-message") + (let ((parts (string-split mesg))) + (match + parts + ((msg c) + (let ((count (string->number c))) + (if (> count 10) + 'done + (conc msg " " (if count count 0))))) + ((msg) + (conc msg " 0")) + (else + "hello 0")))) + +(define (main) + (match + (command-line-arguments) + ((run myport) + ;; start listener + ;; put myaddr into file by host-pid in .runners + ;; for 1 minute + ;; get all in .runners + ;; call each with a message + ;; + (let* ((port (string->number myport)) + (endtimes (+ (current-seconds) 20)) ;; run for 20 seconds + (handler (lambda (rem-host-port qrykey cmd params) + (process-message params))) + (uconn (run-listener handler myport)) + (rfile (conc ".runners/"(get-host-name)"-"(current-process-id)))) + (if (not (and (file-exists? ".runners") + (directory? ".runners"))) + (create-directory ".runners" #t)) + (with-output-to-file rfile + (lambda () + (print myport))) + (let loop ((entries '())) + (if (> (current-seconds) endtimes) + (begin + (delete-file* rfile) + (sleep 1) + (exit)) + (if (null? entries) + (loop (glob ".runners/*")) + (let* ((entry (car entries)) + (destaddr (with-input-from-file entry read-line))) + (call uconn (conc "hello-from-"myport"to-"destaddr) (conc "localhost:" destaddr)) + ;; (thread-sleep! 0.025) + (loop (cdr entries)))))))) + ((cmd)(print "ERROR: command "cmd", not recognised.\n\n"help)) + (else + (print help)))) + +) ;; end module + +(import ulex-test) +(main) + + DELETED ulex.scm Index: ulex.scm ================================================================== --- ulex.scm +++ /dev/null @@ -1,24 +0,0 @@ -;;====================================================================== -;; Copyright 2019, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -;;====================================================================== - -(declare (unit ulex)) -(declare (uses pkts)) - -(include "ulex/ulex.scm") ADDED ulex.scm.template Index: ulex.scm.template ================================================================== --- /dev/null +++ ulex.scm.template @@ -0,0 +1,23 @@ +;;====================================================================== +;; Copyright 2019, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit ulex)) + +(include "ulex-FLAVOR/ulex.scm") ADDED ulex/Makefile Index: ulex/Makefile ================================================================== --- /dev/null +++ ulex/Makefile @@ -0,0 +1,9 @@ +all : ulex.pdf ulex.png + +ulex.pdf : ulex.dot + dot -Tpdf ulex.dot -o ulex.pdf + +ulex.png : ulex.dot + dot -Tpng ulex.dot -o ulex.png + + ADDED ulex/dbmgr.scm Index: ulex/dbmgr.scm ================================================================== --- /dev/null +++ ulex/dbmgr.scm @@ -0,0 +1,1131 @@ +;;====================================================================== +;; Copyright 2022, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbmgrmod)) + +(declare (uses ulex)) +(declare (uses apimod)) +(declare (uses pkts)) +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses portloggermod)) +(declare (uses debugprint)) + +(module dbmgrmod + * + +(import scheme + chicken.base + chicken.condition + chicken.file + chicken.format + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + + (prefix sqlite3 sqlite3:) + matchable + md5 + message-digest + regex + s11n + srfi-1 + srfi-18 + srfi-69 + system-information + typed-records + + pkts + ulex + + commonmod + apimod + dbmod + debugprint + (prefix mtargs args:) + portloggermod + ) + +;; Configurations for server +;; (tcp-buffer-size 2048) +;; (max-connections 2048) + +;; info about me as a listener and my connections to db servers +;; stored (for now) in *db-serv-info* +;; +(defstruct servdat + (host #f) + (port #f) + (uuid #f) + (dbfile #f) + (uconn #f) ;; this is the listener *FOR THIS PROCESS* + (mode #f) + (status 'starting) + (trynum 0) ;; count the number of ports we've tried + (conns (make-hash-table)) ;; apath/dbname => conndat + ) + +(define *db-serv-info* (make-servdat)) + +(define (servdat->url sdat) + (conc (servdat-host sdat)":"(servdat-port sdat))) + +;; db servers contact info +;; +(defstruct conndat + (apath #f) + (dbname #f) + (fullname #f) + (hostport #f) + (ipaddr #f) + (port #f) + (srvpkt #f) + (srvkey #f) + (lastmsg 0) + (expires 0)) + +(define *srvpktspec* + `((server (host . h) + (port . p) + (servkey . k) + (pid . i) + (ipaddr . a) + (dbpath . d)))) + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +;; set up the api proc, seems like there should be a better place for this? +;; +;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE +;; +;; (define api-proc (make-parameter conc)) +;; (api-proc api:execute-requests) + +;; do we have a connection to apath dbname and +;; is it not expired? then return it +;; +;; else setup a connection +;; +;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception +;; +(define (rmt:get-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-ref/default (servdat-conns remdat) fullname #f))) + +(define (rmt:drop-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-delete! (servdat-conns remdat) fullname))) + +(define (rmt:find-main-server uconn apath dbname) + (let* ((pktsdir (get-pkts-dir apath)) + (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) + (viable-srvs (get-viable-servers all-srvpkts dbname))) + (get-the-server uconn apath viable-srvs))) + + +(define *connstart-mutex* (make-mutex)) +(define *last-main-start* 0) + +;; looks for a connection to main, returns if have and not exired +;; creates new otherwise +;; +;; connections for other servers happens by requesting from main +;; +;; TODO: This is unnecessarily re-creating the record in the hash table +;; +(define (rmt:open-main-connection remdat apath) + (let* ((fullpath (db:dbname->path apath ".db/main.db")) + (conns (servdat-conns remdat)) + (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this + (start-rmt:run (lambda () + (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server"))) + (thread-start! th1) + (thread-sleep! 1) + (let loop ((count 0)) + (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection") + (if (or (not *db-serv-info*) + (not (servdat-uconn *db-serv-info*))) + (begin + (thread-sleep! 1) + (loop (+ count 1))) + (begin + (servdat-mode-set! *db-serv-info* 'non-db) + (servdat-uconn *db-serv-info*))))))) + (myconn (servdat-uconn *db-serv-info*))) + (cond + ((not myconn) + (start-rmt:run) + (rmt:open-main-connection remdat apath)) + ((and conn ;; conn is NOT a socket, just saying ... + (< (current-seconds) (conndat-expires conn))) + #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died + ((and conn + (>= (current-seconds)(conndat-expires conn))) + (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") + (rmt:drop-conn remdat apath ".db/main.db") ;; + (rmt:open-main-connection remdat apath)) + (else + ;; Below we will find or create and connect to main + (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch") + (let* ((dbname (db:run-id->dbname #f)) + (the-srv (rmt:find-main-server myconn apath dbname)) + (start-main-srv (lambda () ;; call IF there is no the-srv found + (mutex-lock! *connstart-mutex*) + (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server + (begin + (api:run-server-process apath dbname) + (set! *last-main-start* (current-seconds)) + (thread-sleep! 1)) + (thread-sleep! 0.25)) + (mutex-unlock! *connstart-mutex*) + (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries + ))) + (if (not the-srv) ;; have server, try connecting to it + (start-main-srv) + (let* ((srv-addr (server-address the-srv)) ;; need serv + (ipaddr (alist-ref 'ipaddr the-srv)) + (port (alist-ref 'port the-srv)) + (srvkey (alist-ref 'servkey the-srv)) + (fullpath (db:dbname->path apath dbname)) + + (new-the-srv (make-conndat + apath: apath + dbname: dbname + fullname: fullpath + hostport: srv-addr + ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvpkt: the-srv + srvkey: srvkey ;; generated by rmt:get-signature on the server side + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2) ;; this needs to be gathered during the ping + ))) + (hash-table-set! conns fullpath new-the-srv))) + #t))))) + +;; NB// sinfo is a servdat struct +;; +(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5)) + (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") + (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable + (fullname (db:dbname->path apath dbname)) + (conns (servdat-conns sinfo)) + (mconn (rmt:get-conn sinfo apath ".db/main.db")) + (dconn (rmt:get-conn sinfo apath dbname))) + #;(if (and mconn + (not (debug:print-logger))) + (begin + (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") + (debug:print-logger rmt:log-to-main))) + (cond + ((and mconn + dconn + (< (current-seconds)(conndat-expires dconn))) + #t) ;; good to go + ((not mconn) ;; no channel open to main? open it... + (rmt:open-main-connection sinfo apath) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + ((not dconn) ;; no channel open to dbname? + (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname)))) + (case res + ((server-started) + (if (> num-tries 0) + (begin + (thread-sleep! 2) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + (begin + (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) + (exit 1)))) + (else + (if (list? res) ;; server has been registered and the info was returned. pass it on. + (begin ;; ("192.168.0.9" 53817 + ;; "5e34239f48e8973b3813221e54701a01" "24310" + ;; "192.168.0.9" + ;; "/home/matt/data/megatest/tests/simplerun" + ;; ".db/1.db") + (match + res + ((host port servkey pid ipaddr apath dbname) + (debug:print-info 0 *default-log-port* "got "res) + (hash-table-set! conns + fullname + (make-conndat + apath: apath + dbname: dbname + hostport: (conc host":"port) + ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvkey: servkey + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2)))) + (else + (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) + res) + (begin + (debug:print-info 0 *default-log-port* "Unexpected result: " res) + res))))))) + #t)) + +;;====================================================================== + +;; FOR DEBUGGING SET TO #t +;; (define *localmode* #t) +(define *localmode* #f) +(define *dbstruct* (make-dbr:dbstruct)) + +;; Defaults to current area +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (sinfo *db-serv-info*) + (dbname (db:run-id->dbname rid))) + (if *localmode* + (api:execute-requests *dbstruct* cmd params) + (begin + (rmt:open-main-connection sinfo apath) + (if rid (rmt:general-open-connection sinfo apath dbname)) + #;(if (not (member cmd '(log-to-main))) + (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) + (rmt:send-receive-real sinfo apath dbname cmd params))))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future +;; +(define (rmt:send-receive-real sinfo apath dbname cmd params) + (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.") + (let* ((cdat (rmt:get-conn sinfo apath dbname))) + (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn (conndat-hostport cdat) cmd params)) + #;(th1 (make-thread (lambda () + (set! res (send-receive uconn (conndat-hostport cdat) cmd params))) + "send-receive thread"))) + ;; (thread-start! th1) + ;; (thread-join! th1) ;; gratuitious thread stuff is so that mailbox is not used in primordial thead + ;; since we accessed the server we can bump the expires time up + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -2)) ;; two second margin for network time misalignments etc. + res))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future. +;; +;; Purpose - call the main.db server and request a server be started +;; for the given area path and dbname +;; + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +;; host and port are used to ensure we are remove proper records +(define (rmt:server-shutdown host port) + (let ((dbfile (servdat-dbfile *db-serv-info*))) + (debug:print-info 0 *default-log-port* "dbfile is "dbfile) + (if dbfile + (let* ((am-server (args:get-arg "-server")) + (dbfile (args:get-arg "-db")) + (apath *toppath*) + #;(sinfo *remotedat*)) ;; foundation for future fix + (if *dbstruct-db* + (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) + (db (dbr:dbdat-db dbdat)) + (inmem (dbr:dbdat-db dbdat)) ;; WRONG + ) + ;; do a final sync here + (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) + (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) + ;; let's finalize here + (debug:print-info 0 *default-log-port* "Finalizing db and inmem") + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) + (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) + (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") + (if (string-match ".*/main.db$" dbfile) + (let ((pkt-file (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) + (delete-file* pkt-file) + (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) + (db:with-lock-db + (servdat-dbfile *db-serv-info*) + (lambda (dbh dbfile) + (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove + (let* ((sdat *db-serv-info*) ;; we have a run-id server + (host (servdat-host sdat)) + (port (servdat-port sdat)) + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) + (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) + ))))))) + + +(define (common:run-sync?) + ;; (and (common:on-homehost?) + (args:get-arg "-server")) + +(define *rmt:run-mutex* (make-mutex)) +(define *rmt:run-flag* #f) + +;; Main entry point to start a server. was start-server +(define (rmt:run hostn) + (mutex-lock! *rmt:run-mutex*) + (if *rmt:run-flag* + (begin + (debug:print-warn 0 *default-log-port* "rmt:run already running.") + (mutex-unlock! *rmt:run-mutex*)) + (begin + (set! *rmt:run-flag* #t) + (mutex-unlock! *rmt:run-mutex*) + ;; ;; Configurations for server + ;; (tcp-buffer-size 2048) + ;; (max-connections 2048) + (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") + (if (and *db-serv-info* + (servdat-uconn *db-serv-info*)) + (let* ((uconn (servdat-uconn *db-serv-info*))) + (wait-and-close uconn)) + (let* ((port (portlogger:open-run-close portlogger:find-port)) + (handler-proc (lambda (rem-host-port qrykey cmd params) ;; + (set! *db-last-access* (current-seconds)) + (assert (list? params) "FATAL: handler called with non-list params") + (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params) + (debug:print 0 *default-log-port* "handler call: "cmd", params="params) + (api:execute-requests *dbstruct-db* cmd params)))) + ;; (api:process-request *dbstuct-db* + (if (not *db-serv-info*) + (set! *db-serv-info* (make-servdat host: hostn port: port))) + (let* ((uconn (run-listener handler-proc port)) + (rport (udat-port uconn))) ;; the real port + (servdat-host-set! *db-serv-info* hostn) + (servdat-port-set! *db-serv-info* rport) + (servdat-uconn-set! *db-serv-info* uconn) + (wait-and-close uconn) + (db:print-current-query-stats) + ))) + (let* ((host (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (mode (or (servdat-mode *db-serv-info*) + "non-db"))) + ;; server exit stuff here + ;; (rmt:server-shutdown host port) - always do in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit + (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting") + )))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + + +;;====================================================================== +;; NEW SERVER METHOD +;;====================================================================== + +;; only use for main.db - need to re-write some of this :( +;; +(define (get-lock-db sdat dbfile host port) + (assert host "FATAL: get-lock-db called with host not set.") + (assert port "FATAL: get-lock-db called with port not set.") + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile host port)) + (uconn (servdat-uconn sdat))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? uconn (conc owner_host":"owner_port) "abc") + #f ;; locked by someone else + (begin ;; locked by someone dead and gone + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) + (sqlite3:finalize! dbh) + res)) + + +(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) + (let* ((pkt-dat `((host . ,host) + (port . ,port) + (servkey . ,servkey) + (pid . ,(current-process-id)) + (ipaddr . ,ipaddr) + (dbpath . ,dbpath))) + (uuid (write-alist->pkt + pkts-dir + pkt-dat + pktspec: pkt-spec + ptype: 'server))) + (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) + uuid)) + +(define (get-pkts-dir #!optional (apath #f)) + (let* ((effective-toppath (or *toppath* apath))) + (assert effective-toppath + "ERROR: get-pkts-dir called without *toppath* set. Exiting.") + (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) + (if (file-exists? pdir) + pdir + (begin + (handle-exceptions ;; this exception handler should NOT be needed but ... + exn + pdir + (create-directory pdir #t)) + pdir))))) + +;; given a pkts dir read +;; +(define (get-all-server-pkts pktsdir-in pktspec) + (let* ((pktsdir (if (file-exists? pktsdir-in) + pktsdir-in + (begin + (create-directory pktsdir-in #t) + pktsdir-in))) + (all-pkt-files (glob (conc pktsdir "/*.pkt")))) + (map (lambda (pkt-file) + (read-pkt->alist pkt-file pktspec: pktspec)) + all-pkt-files))) + +(define (server-address srv-pkt) + (conc (alist-ref 'host srv-pkt) ":" + (alist-ref 'port srv-pkt))) + +(define (server-ready? uconn host-port key) ;; server-address is host:port + (let* ((params `((cmd . ping)(key . ,key))) + (data `((cmd . ping) + (key . ,key) + (params . ,params))) ;; I don't get it. + (res (send-receive uconn host-port 'ping data))) + (if (eq? res 'ack) ;; yep, likely it is who we want on the other end + res + #f))) +;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f)))) + +; from the pkts return servers associated with dbpath +;; NOTE: Only one can be alive - have to check on each +;; in the list of pkts returned +;; +(define (get-viable-servers serv-pkts dbpath) + (let loop ((tail serv-pkts) + (res '())) + (if (null? tail) + res ;; NOTE: sort by age so oldest is considered first + (let* ((spkt (car tail))) + (loop (cdr tail) + (if (equal? dbpath (alist-ref 'dbpath spkt)) + (cons spkt res) + res)))))) + +(define (remove-pkts-if-not-alive uconn serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (host-port (conc host":"port)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (server-ready? uconn host-port key))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) + +;; from viable servers get one that is alive and ready +;; +(define (get-the-server uconn apath serv-pkts) + (let loop ((tail serv-pkts)) + (if (null? tail) + #f + (let* ((spkt (car tail)) + (host (alist-ref 'ipaddr spkt)) + (port (alist-ref 'port spkt)) + (host-port (conc host":"port)) + (dbpth (alist-ref 'dbpath spkt)) + (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt)) + (addr (server-address spkt))) + (if (server-ready? uconn host-port srvkey) + spkt + (loop (cdr tail))))))) + +;; am I the "first" in line server? I.e. my D card is smallest +;; use Z card as tie breaker +;; +(define (get-best-candidate serv-pkts dbpath) + (if (null? serv-pkts) + #f + (let loop ((tail serv-pkts) + (best (car serv-pkts))) + (if (null? tail) + best + (let* ((candidate (car tail)) + (candidate-bd (string->number (alist-ref 'D candidate))) + (best-bd (string->number (alist-ref 'D best))) + ;; bigger number is younger + (candidate-z (alist-ref 'Z candidate)) + (best-z (alist-ref 'Z best)) + (new-best (cond + ((> best-bd candidate-bd) ;; best is younger than candidate + candidate) + ((< best-bd candidate-bd) ;; candidate is younger than best + best) + (else + (if (string>=? best-z candidate-z) + best + candidate))))) ;; use Z card as tie breaker + (if (null? tail) + new-best + (loop (cdr tail) new-best))))))) + + +;;====================================================================== +;; END NEW SERVER METHOD +;;====================================================================== + +;; if .db/main.db check the pkts +;; +(define (rmt:wait-for-server pkts-dir db-file server-key) + (let* ((sdat *db-serv-info*)) + (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *db-serv-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (let* ((uconn (servdat-uconn sdat))) + (servdat-status-set! sdat 'iface-stable) + (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server") + ;; create a server pkt in *toppath*/.meta/srvpkts + + ;; TODO: + ;; 1. change sdat to stuct + ;; 2. add uuid to struct + ;; 3. update uuid in sdat here + ;; + (servdat-uuid-set! sdat + (register-server + pkts-dir *srvpktspec* + (get-host-name) + (servdat-port sdat) server-key + (servdat-host sdat) db-file)) + ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key + ;; now read pkts and see if we are a contender + (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) + (viables (get-viable-servers all-pkts db-file)) + (alive (remove-pkts-if-not-alive uconn viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) + ;; am I the best-srv, compare server-keys to know + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (begin + (debug:print-info 0 *default-log-port* "I'm the server!") + (servdat-dbfile-set! sdat db-file) + (servdat-status-set! sdat 'db-locked)) + (begin + (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + (begin + (debug:print-info 0 *default-log-port* + "Keys do not match "best-srv-key", "server-key", exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + sdat)) + (begin ;; sdat not yet contains server info + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat)))))))) + +(define (rmt:register-server sinfo apath iface port server-key dbname) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'register-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:get-count-servers sinfo apath) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) + +(define (rmt:deregister-server db-serv-info apath iface port server-key dbname) + (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db + (rmt:send-receive-real db-serv-info apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'deregister-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) + ;; wait until *db-serv-info* stops changing + (let* ((stime (current-seconds))) + (let loop ((last-host #f) + (last-port #f) + (tries 0)) + (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*))) + (curr-port (and *db-serv-info* (servdat-port *db-serv-info*)))) + ;; first we verify port and interface, update *db-serv-info* in need be. + (cond + ((> tries num-tries-allowed) + (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") + (exit 1)) + ((not *db-serv-info*) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not last-host)(not last-port)) + (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not (equal? last-host curr-host)) + (not (equal? last-port curr-port))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed + (thread-sleep! 0.5) + (loop curr-host curr-port (+ tries 1))) + (else + (rmt:get-signature) ;; sets *my-signature* as side effect + (servdat-status-set! *db-serv-info* 'interface-stable) + (debug:print 0 *default-log-port* + "SERVER STARTED: " curr-host + ":" curr-port + " AT " (current-seconds) " server signature: " *my-signature* + " with "(servdat-trynum *db-serv-info*)" port changes") + (flush-output *default-log-port*) + #t)))))) + +;; run rmt:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (rmt:keep-running dbname) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + + (let* ((sinfo *db-serv-info*) + (server-start-time (current-seconds)) + (pkts-dir (get-pkts-dir)) + (server-key (rmt:get-signature)) ;; This servers key + (is-main (equal? (args:get-arg "-db") ".db/main.db")) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (host port) + (set! *unclean-shutdown* #f) ;; Should not be needed anymore + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; (rmt:server-shutdown host port) -- called in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) + (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db")) + ;; main and run db servers have both got wait logic (could/should merge it) + (if is-main + (rmt:wait-for-server pkts-dir dbname server-key) + (rmt:wait-for-stable-interface)) + ;; this is our forever loop + (let* ((iface (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (uconn (servdat-uconn *db-serv-info*))) + (let loop ((count 0) + (bad-sync-count 0) + (start-time (current-milliseconds))) + (if (and (not is-main) + (common:low-noise-print 60 "servdat-status")) + (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*))) + + (mutex-lock! *heartbeat-mutex*) + ;; set up the database handle + (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate + (let ((watchdog (bdat-watchdog *bdat*))) + (debug:print 0 *default-log-port* "SERVER: dbprep") + (db:setup dbname) ;; sets *dbstruct-db* as side effect + (servdat-status-set! *db-serv-info* 'db-opened) + ;; IFF I'm not main, call into main and register self + (if (not is-main) + (let ((res (rmt:register-server sinfo + *toppath* iface port + server-key dbname))) + (if res ;; we are the server + (servdat-status-set! *db-serv-info* 'have-interface-and-db) + ;; now check that the db locker is alive, clear it out if not + (let* ((serv-info (rmt:server-info *toppath* dbname))) + (match serv-info + ((host port servkey pid ipaddr apath dbpath) + (if (not (server-ready? uconn (conc host":"port) servkey)) + (begin + (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") + (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) + (loop (+ count 1) bad-sync-count start-time)))) + (else + (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) + (exit))))))) + (debug:print 0 *default-log-port* + "SERVER: running, db "dbname" opened, megatest version: " + (common:get-full-version)) + ;; start the watchdog + + ;; is this really needed? + + #;(if watchdog + (if (not (member (thread-state watchdog) + '(ready running blocked + sleeping dead))) + (begin + (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")") + (thread-start! watchdog)) + (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")")) + (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it.")) + #;(loop (+ count 1) bad-sync-count start-time) + )) + + (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) + + (mutex-unlock! *heartbeat-mutex*) + + ;; when things go wrong we don't want to be doing the various + ;; queries too often so we strive to run this stuff only every + ;; four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (set! last-access *db-last-access*) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) bad-sync-count (current-milliseconds))) + + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence (get-host-name) port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port)) + ((and *server-run* + (or (not (timed-out?)) + (if is-main ;; do not exit if there are other servers (keep main open until all others gone) + (> (rmt:get-count-servers sinfo *toppath*) 1) + #f))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) + (loop 0 bad-sync-count (current-milliseconds))) + (else + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port) + #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " + (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown + (sexpr->string 'quit)))))))))) + +(define (rmt:get-reasonable-hostname) + (let* ((inhost (or (args:get-arg "-server") "-"))) + (if (equal? inhost "-") + (get-host-name) + inhost))) + +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; This is the point at which servers are started +;; +(define (rmt:server-launch dbname) + (debug:print-info 0 *default-log-port* "Entered rmt:server-launch") + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (rmt:run (rmt:get-reasonable-hostname))) + "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (if (args:get-arg "-server") + (rmt:keep-running dbname))) + "Keep running"))) + (thread-start! th2) + (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (thread-join! th3)) + #f) + +;;====================================================================== +;; S E R V E R - D I R E C T C A L L S +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server #f (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server #f (list run-id))) + +(define (rmt:server-info apath dbname) + (rmt:send-receive 'get-server-info #f (list apath dbname))) + +;;====================================================================== +;; Nanomsg transport +;;====================================================================== + +#;(define (is-port-in-use port-num) + (let* ((ret #f)) + (let-values (((inp oup pid) + (process "netstat" (list "-tulpn" )))) + (let loop ((inl (read-line inp))) + (if (not (eof-object? inl)) + (begin + (if (string-search (regexp (conc ":" port-num)) inl) + (begin + ;(print "Output: " inl) + (set! ret #t)) + (loop (read-line inp))))))) + ret)) + +#;(define (open-nn-connection host-port) + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port))) + (nng-dial req uri) + (socket-set! req 'nng/recvtimeo 2000) + req)) + +#;(define (send-receive-nn req msg) + (nng-send req msg) + (nng-recv req)) + +#;(define (close-nn-connection req) + (nng-close! req)) + +;; ;; open connection to server, send message, close connection +;; ;; +;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds +;; (let ((req (make-req-socket 'req)) +;; (uri (conc "tcp://" host-port)) +;; (res #f) +;; ;; (contacts (alist-ref 'contact attrib)) +;; ;; (mode (alist-ref 'mode attrib)) +;; ) +;; (socket-set! req 'nng/recvtimeo 2000) +;; (handle-exceptions +;; exn +;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) +;; ;; Send notification +;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) +;; #f) +;; (nng-dial req uri) +;; ;; (print "Connected to the server " ) +;; (nng-send req msg) +;; ;; (print "Request Sent") +;; (let* ((th1 (make-thread (lambda () +;; (let ((resp (nng-recv req))) +;; (nng-close! req) +;; (set! res (if (equal? resp "ok") +;; #t +;; #f)))) +;; "recv thread")) +;; (th2 (make-thread (lambda () +;; (thread-sleep! timeout) +;; (thread-terminate! th1)) +;; "timer thread"))) +;; (thread-start! th1) +;; (thread-start! th2) +;; (thread-join! th1) +;; res)))) +;; +#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port)) + (res #f)) + (handle-exceptions + exn + (let ((emsg ((condition-property-accessor 'exn 'message) exn))) + ;; Send notification + (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn) + #f) + (nng-dial req uri) + (nng-send req msg) + (let* ((th1 (make-thread (lambda () + (let ((resp (nng-recv req))) + (nng-close! req) + ;; (print resp) + (set! res resp))) + "recv thread")) + (th2 (make-thread (lambda () + (thread-sleep! timeout) + (thread-terminate! th1)) + "timer thread"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + res)))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; run ping in separate process, safest way in some cases +;; +#;(define (server:ping-server ifaceport) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -ping " ifaceport) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) + +;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; +#;(define (server:login toppath) + (lambda (toppath) + (set! *db-last-access* (current-seconds)) ;; might not be needed. + (if (equal? *toppath* toppath) + #t + #f))) + +;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; (define (server:release-sync-lock) +;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; (define (server:have-sync-lock?) +;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; (have-lock? (car have-lock-pair)) +;; (lock-time (cdr have-lock-pair)) +;; (lock-age (- (current-seconds) lock-time))) +;; (cond +;; (have-lock? #t) +;; ((>lock-age +;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; (server:release-sync-lock) +;; (server:have-sync-lock?)) +;; (else #f)))) + +) DELETED ulex/ulex-prev-orig.scm Index: ulex/ulex-prev-orig.scm ================================================================== --- ulex/ulex-prev-orig.scm +++ /dev/null @@ -1,2252 +0,0 @@ -;; ulex: Distributed sqlite3 db -;;; -;; Copyright (C) 2018 Matt Welland -;; Redistribution and use in source and binary forms, with or without -;; modification, is permitted. -;; -;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS -;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE -;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT -;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -;; DAMAGE. - -;;====================================================================== -;; ABOUT: -;; See README in the distribution at https://www.kiatoa.com/fossils/ulex -;; NOTES: -;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. -;; -;;====================================================================== - -(use mailbox) - -(module ulex - * - -(import scheme posix chicken data-structures ports extras files mailbox) -(import srfi-18 pkts matchable regex - typed-records srfi-69 srfi-1 - srfi-4 regex-case - (prefix sqlite3 sqlite3:) - foreign - tcp6 - ;; ulex-netutil - hostinfo - ) - -;; make it a global? Well, it is local to area module - -(define *captain-pktspec* - `((captain (host . h) - (port . p) - (pid . i) - (ipaddr . a) - ) - #;(data (hostname . h) ;; sender hostname - (port . p) ;; sender port - (ipaddr . a) ;; sender ip - (hostkey . k) ;; sending host key - store info at server under this key - (servkey . s) ;; server key - this needs to match at server end or reject the msg - (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json - (data . d) ;; base64 encoded slln data - ))) - -;; struct for keeping track of our world - -(defstruct udat - ;; captain info - (captain-address #f) - (captain-host #f) - (captain-port #f) - (captain-pid #f) - (captain-lease 0) ;; time (unix epoc) seconds when the lease is up - (ulex-dir (conc (get-environment-variable "HOME") "/.ulex")) - (cpkts-dir (conc (get-environment-variable "HOME") "/.ulex/pkts")) - (cpkt-spec *captain-pktspec*) - ;; this processes info - (my-cpkt-key #f) ;; put Z card here when I create a pkt for myself as captain - (my-address #f) - (my-hostname #f) - (my-port #f) - (my-pid (current-process-id)) - (my-dbs '()) - ;; server and handler thread - (serv-listener #f) ;; this processes server info - (handler-thread #f) - (mboxes (make-hash-table)) ;; key => mbox - ;; other servers - (peers (make-hash-table)) ;; host-port => peer record - (dbowners (make-hash-table)) ;; dbfile => host-port - (handlers (make-hash-table)) ;; dbfile => proc - ;; (outgoing-conns (make-hash-table)) ;; host:port -> conn - (work-queue (make-queue)) ;; most stuff goes here - ;; (fast-queue (make-queue)) ;; super quick stuff goes here (e.g. ping) - (busy #f) ;; is either of the queues busy, use to switch between queuing tasks or doing immediately - ;; app info - (appname #f) - (dbtypes (make-hash-table)) ;; this should be an alist but hash is easier. dbtype => [ initproc syncproc ] - ;; cookies - (cnum 0) ;; cookie num - ) - -;;====================================================================== -;; NEW APPROACH -;;====================================================================== - -;; start-server-find-port ;; gotta have a server port ready from the very begining - -;; udata - all the connection info, captain, server, ulex db etc. MUST BE PASSED IN -;; dbpath - full path and filename of the db to talk to or a symbol naming the db? -;; callname - the remote call to execute -;; params - parameters to pass to the remote call -;; -(define (remote-call udata dbpath dbtype callname . params) - (start-server-find-port udata) ;; ensure we have a local server - (find-or-setup-captain udata) - ;; look at connect, process-request, send, send-receive - (let-values (((cookie-key host-port)(get-db-owner udata dbpath dbtype))) - (send-receive udata host-port callname cookie-key params))) - -;;====================================================================== -;; KEY FUNCTIONS - THESE ARE TOO BE EXPOSED AND USED -;;====================================================================== - -;; connection setup and management functions - -;; This is the basic setup command. Must always be -;; called before connecting to a db using connect. -;; -;; find or become the captain -;; setup and return a ulex object -;; -(define (find-or-setup-captain udata) - ;; see if we already have a captain and if the lease is ok - (if (and (udat-captain-address udata) - (udat-captain-port udata) - (< (current-seconds) (udat-captain-lease udata))) - udata - (let* ((cpkts (get-all-captain-pkts udata)) ;; read captain pkts - (captn (get-winning-pkt cpkts))) - (if captn - (let* ((port (alist-ref 'port captn)) - (host (alist-ref 'host captn)) - (ipaddr (alist-ref 'ipaddr captn)) - (pid (alist-ref 'pid captn)) - (Z (alist-ref 'Z captn))) - (udat-captain-address-set! udata ipaddr) - (udat-captain-host-set! udata host) - (udat-captain-port-set! udata port) - (udat-captain-pid-set! udata pid) - (udat-captain-lease-set! udata (+ (current-seconds) 10)) - (let-values (((success pingtime)(ping udata (conc ipaddr ":" port)))) - (if success - udata - (begin - (print "Found unreachable captain at " ipaddr ":" port ", removing pkt") - (remove-captain-pkt udata captn) - (find-or-setup-captain udata)))) - (begin - (setup-as-captain udata) ;; this saves the thread to captain-thread and starts the thread - (find-or-setup-captain udata))))))) - -;; connect to a specific dbfile -;; - if already connected - return the dbowner host-port -;; - ask the captain who to talk to for this db -;; - put the entry in the dbowners hash as dbfile => host-port -;; -(define (connect udata dbfname dbtype) - (or (hash-table-ref/default (udat-dbowners udata) dbfname #f) - (let-values (((success dbowner-host-port)(get-db-owner udata dbfname dbtype))) - (if success - (begin - ;; just clobber the record, this is the new data no matter what - (hash-table-set! (udat-dbowners udata) dbfname dbowner-host-port) - dbowner-host-port) - #f)))) - -;; returns: success pingtime -;; -;; NOTE: causes the callee to store the info on this host along with the dbs this host currently owns -;; -(define (ping udata host-port) - (let* ((start (current-milliseconds)) - (cookie (make-cookie udata)) - (dbs (udat-my-dbs udata)) - (msg (string-intersperse dbs " ")) - (res (send udata host-port 'ping cookie msg retval: #t)) - (delta (- (current-milliseconds) start))) - (values (equal? res cookie) delta))) - -;; returns: success pingtime -;; -;; NOTE: causes all references to this worker to be wiped out in the -;; callee (ususally the captain) -;; -(define (goodbye-ping udata host-port) - (let* ((start (current-milliseconds)) - (cookie (make-cookie udata)) - (dbs (udat-my-dbs udata)) - (res (send udata host-port 'goodbye cookie "nomsg" retval: #t)) - (delta (- (current-milliseconds) start))) - (values (equal? res cookie) delta))) - -(define (goodbye-captain udata) - (let* ((host-port (udat-captain-host-port udata))) - (if host-port - (goodbye-ping udata host-port) - (values #f -1)))) - -(define (get-db-owner udata dbname dbtype) - (let* ((host-port (udat-captain-host-port udata))) - (if host-port - (let* ((cookie (make-cookie udata)) - (msg #f) ;; (conc dbname " " dbtype)) - (params `(,dbname ,dbtype)) - (res (send udata host-port 'db-owner cookie msg - params: params retval: #t))) - (match (string-split res) - ((retcookie owner-host-port) - (values (equal? retcookie cookie) owner-host-port)))) - (values #f -1)))) - -;; called in ulex-handler to dispatch work, called on the workers side -;; calls (proc params data) -;; returns result with cookie -;; -;; pdat is the info of the caller, used to send the result data -;; prockey is key into udat-handlers hash dereferencing a proc -;; procparam is a first param handed to proc - often to do further derefrencing -;; NOTE: params is intended to be a list of strings, encoding on data -;; is up to the user but data must be a single line -;; -(define (process-request udata pdat dbname cookie prockey procparam data) - (let* ((dbrec (ulex-open-db udata dbname)) ;; this will be a dbconn record, looks for in udata first - (proc (hash-table-ref udata prockey))) - (let* ((result (proc dbrec procparam data))) - result))) - -;; remote-request - send to remote to process in process-request -;; uconn comes from a call to connect and can be used instead of calling connect again -;; uconn is the host-port to call -;; we send dbname to the worker so they know which file to open -;; data must be a string with no newlines, it will be handed to the proc -;; at the remote site unchanged. It is up to the user to encode/decode it's contents -;; -;; rtype: immediate, read-only, normal, low-priority -;; -(define (remote-request udata uconn rtype dbname prockey procparam data) - (let* ((cookie (make-cookie udata))) - (send-receive udata uconn rtype cookie data `(,prockey procparam)))) - -(define (ulex-open-db udata dbname) - #f) - - -;;====================================================================== -;; Ulex db -;; -;; - track who is captain, lease expire time -;; - track who owns what db, lease -;; -;;====================================================================== - -;; -;; -(define (ulex-dbfname) - (let ((dbdir (conc (get-environment-variable "HOME") "/.ulex"))) - (if (not (file-exists? dbdir)) - (create-directory dbdir #t)) - (conc dbdir "/network.db"))) - -;; always goes in ~/.ulex/network.db -;; role is captain, adjutant, node -;; -(define (ulexdb-setup) - (let* ((dbfname (ulex-dbfname)) - (have-db (file-exists? dbfname)) - (db (sqlite3:open-database dbfname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (if (not have-db) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (stmt) - (if stmt (sqlite3:execute db stmt))) - `("CREATE TABLE IF NOT EXISTS nodes - (id INTEGER PRIMARY KEY, - role TEXT NOT NULL, - host TEXT NOT NULL, - port TEXT NOT NULL, - ipadr TEXT NOT NULL, - pid INTEGER NOT NULL, - zcard TEXT NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - lease_thru INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - "CREATE TRIGGER IF NOT EXISTS update_nodes_trigger AFTER UPDATE ON nodes - FOR EACH ROW - BEGIN - UPDATE nodes SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" - "CREATE TABLE IF NOT EXISTS dbs - (id INTEGER PRIMARY KEY, - dbname TEXT NOT NULL, - dbfile TEXT NOT NULL, - dbtype TEXT NOT NULL, - host_port TEXT NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - lease_thru INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - "CREATE TRIGGER IF NOT EXISTS update_dbs_trigger AFTER UPDATE ON dbs - FOR EACH ROW - BEGIN - UPDATE dbs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;"))))) - db)) - -(define (get-host-port-lease db dbfname) - (sqlite3:fold-row - (lambda (rem host-port lease-thru) - (list host-port lease-thru)) - #f db "SELECT host_port,lease_thru FROM dbs WHERE dbfile = ?" dbfname)) - -(define (register-captain db host ipadr port pid zcard #!key (lease 20)) - (let* ((dbfname (ulex-dbfname)) - (host-port (conc host ":" port))) - (sqlite3:with-transaction - db - (lambda () - (match (get-host-port-lease db dbfname) - ((host-port lease-thru) - (if (> (current-seconds) lease-thru) - (begin - (sqlite3:execute db "UPDATE dbs SET host_port=?,lease_thru=? WHERE dbname=?" - (conc host ":" port) - (+ (current-seconds) lease) - dbfname) - #t) - #f)) - (#f (sqlite3:execute db "INSERT INTO dbs (dbname,dbfile,dbtype,host_port,lease_thru) VALUES (?,?,?,?,?)" - "captain" dbfname "captain" host-port (+ (current-seconds) lease))) - (else (print "ERROR: Unrecognised result from fold-row") - (exit 1))))))) - -;;====================================================================== -;; network utilities -;;====================================================================== - -(define (rate-ip ipaddr) - (regex-case ipaddr - ( "^127\\..*" _ 0 ) - ( "^(10\\.0|192\\.168)\\..*" _ 1 ) - ( else 2 ) )) - -;; Change this to bias for addresses with a reasonable broadcast value? -;; -(define (ip-pref-less? a b) - (> (rate-ip a) (rate-ip b))) - - -(define (get-my-best-address) - (let ((all-my-addresses (get-all-ips)) - ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) - ) - (cond - ((null? all-my-addresses) - (get-host-name)) ;; no interfaces? - ((eq? (length all-my-addresses) 1) - (car all-my-addresses)) ;; only one to choose from, just go with it - - (else - (car (sort all-my-addresses ip-pref-less?))) - ;; (else - ;; (ip->string (car (filter (lambda (x) ;; take any but 127. - ;; (not (eq? (u8vector-ref x 0) 127))) - ;; all-my-addresses)))) - - ))) - -(define (get-all-ips-sorted) - (sort (get-all-ips) ip-pref-less?)) - -(define (get-all-ips) - (map ip->string (vector->list - (hostinfo-addresses - (host-information (current-hostname)))))) - -(define (udat-my-host-port udata) - (if (and (udat-my-address udata)(udat-my-port udata)) - (conc (udat-my-address udata) ":" (udat-my-port udata)) - #f)) - -(define (udat-captain-host-port udata) - (if (and (udat-captain-address udata)(udat-captain-port udata)) - (conc (udat-captain-address udata) ":" (udat-captain-port udata)) - #f)) - -(define (udat-get-peer udata host-port) - (hash-table-ref/default (udat-peers udata) host-port #f)) - -;; struct for keeping track of others we are talking to - -(defstruct peer - (addr-port #f) - (hostname #f) - (pid #f) - ;; (inp #f) - ;; (oup #f) - (dbs '()) ;; list of databases this peer is currently handling - ) - -(defstruct work - (peer-dat #f) - (handlerkey #f) - (qrykey #f) - (data #f) - (start (current-milliseconds))) - -#;(defstruct dbowner - (pdat #f) - (last-update (current-seconds))) - -;;====================================================================== -;; Captain functions -;;====================================================================== - -;; NB// This needs to be started in a thread -;; -;; setup to be a captain -;; - local server MUST be started already -;; - create pkt -;; - start server port handler -;; -(define (setup-as-captain udata) - (if (create-captain-pkt udata) - (let* ((my-addr (udat-my-address udata)) - (my-port (udat-my-port udata)) - (th (make-thread (lambda () - (ulex-handler-loop udata)) "Captain handler"))) - (udat-handler-thread-set! udata th) - (udat-captain-address-set! udata my-addr) - (udat-captain-port-set! udata my-port) - (thread-start! th)) - (begin - (print "ERROR: failed to create captain pkt") - #f))) - -;; given a pkts dir read -;; -(define (get-all-captain-pkts udata) - (let* ((pktsdir (let ((d (udat-cpkts-dir udata))) - (if (file-exists? d) - d - (begin - (create-directory d #t) - d)))) - (all-pkt-files (glob (conc pktsdir "/*.pkt"))) - (pkt-spec (udat-cpkt-spec udata))) - (map (lambda (pkt-file) - (read-pkt->alist pkt-file pktspec: pkt-spec)) - all-pkt-files))) - -;; sort by D then Z, return one, choose the oldest then -;; differentiate if needed using the Z key -;;l -(define (get-winning-pkt pkts) - (if (null? pkts) - #f - (car (sort pkts (lambda (a b) - (let ((ad (string->number (alist-ref 'D a))) - (bd (string->number (alist-ref 'D b)))) - (if (eq? a b) - (let ((az (alist-ref 'Z a)) - (bz (alist-ref 'Z b))) - (string>=? az bz)) - (> ad bd)))))))) - -;; put the host, ip, port and pid into a pkt in -;; the captain pkts dir -;; - assumes user has already fired up a server -;; which will be in the udata struct -;; -(define (create-captain-pkt udata) - (if (not (udat-serv-listener udata)) - (begin - (print "ERROR: create-captain-pkt called with out a listener") - #f) - (let* ((pktdat `((port . ,(udat-my-port udata)) - (host . ,(udat-my-hostname udata)) - (ipaddr . ,(udat-my-address udata)) - (pid . ,(udat-my-pid udata)))) - (pktdir (udat-cpkts-dir udata)) - (pktspec (udat-cpkt-spec udata)) - ) - (udat-my-cpkt-key-set! - udata - (write-alist->pkt - pktdir - pktdat - pktspec: pktspec - ptype: 'captain)) - (udat-my-cpkt-key udata)))) - -;; remove pkt associated with captn (the Z key .pkt) -;; -(define (remove-captain-pkt udata captn) - (let ((Z (alist-ref 'Z captn)) - (cpktdir (udat-cpkts-dir udata))) - (delete-file* (conc cpktdir "/" Z ".pkt")))) - -;; call all known peers and tell them to delete their info on the captain -;; thus forcing them to re-read pkts and connect to a new captain -;; call this when the captain needs to exit and if an older captain is -;; detected. Due to delays in sending file meta data in NFS multiple -;; captains can be initiated in a "Storm of Captains", book soon to be -;; on Amazon -;; -(define (drop-captain udata) - (let* ((peers (hash-table-keys (udat-peers udata))) - (cookie (make-cookie udata))) - (for-each - (lambda (host-port) - (send udata host-port 'dropcaptain cookie "nomsg" retval: #t)) - peers))) - -;;====================================================================== -;; server primitives -;;====================================================================== - -(define (make-cookie udata) - (let ((newcnum (+ (udat-cnum udata) 1))) - (udat-cnum-set! udata newcnum) - (conc (udat-my-address udata) ":" - (udat-my-port udata) "-" - (udat-my-pid udata) "-" - newcnum))) - -;; create a tcp listener and return a populated udat struct with -;; my port, address, hostname, pid etc. -;; return #f if fail to find a port to allocate. -;; -;; if udata-in is #f create the record -;; if there is already a serv-listener return the udata -;; -(define (start-server-find-port udata-in #!optional (port 4242)) - (let ((udata (or udata-in (make-udat)))) - (if (udat-serv-listener udata) ;; TODO - add check that the listener is alive and ready? - udata - (handle-exceptions - exn - (if (< port 65535) - (start-server-find-port udata (+ port 1)) - #f) - (connect-server udata port))))) - -(define (connect-server udata port) - ;; (tcp-listener-socket LISTENER)(socket-name so) - ;; sockaddr-address, sockaddr-port, sockaddr->string - (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) - (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) - (udat-my-address-set! udata addr) - (udat-my-port-set! udata port) - (udat-my-hostname-set! udata (get-host-name)) - (udat-serv-listener-set! udata tlsn) - udata)) - -(define (get-peer-dat udata host-port #!optional (hostname #f)(pid #f)) - (let* ((pdat (or (udat-get-peer udata host-port) - (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE SPECIFIC - exn - #f - (let ((npdat (make-peer addr-port: host-port))) - (if hostname (peer-hostname-set! npdat hostname)) - (if pid (peer-pid-set! npdat pid)) - npdat))))) - pdat)) - -;; send structured data to recipient -;; -;; NOTE: qrykey is what was called the "cookie" previously -;; -;; retval tells send to expect and wait for return data (one line) and return it or time out -;; this is for ping where we don't want to necessarily have set up our own server yet. -;; -(define (send udata host-port handler qrykey data - #!key (hostname #f)(pid #f)(params '())(retval #f)) - (let* ((my-host-port (udat-my-host-port udata)) - (isme (equal? host-port my-host-port)) ;; am I calling - ;; myself? - (dat (list - handler ;; " " - my-host-port ;; " " - (udat-my-pid udata) ;; " " - qrykey - params ;;(if (null? params) "" (conc " " - ;;(string-intersperse params " "))) - ))) - ;; (print "send isme is " (if isme "true!" "false!") ", - ;; my-host-port: " my-host-port ", host-port: " host-port) - (if isme - (ulex-handler udata dat data) - (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE - ;; SPECIFIC - exn - #f - (let-values (((inp oup)(tcp-connect host-port))) - ;; - ;; CONTROL LINE: - ;; handlerkey host:port pid qrykey params ... - ;; - (let ((res - (if (and inp oup) - (let* () - (if my-host-port - (begin - (write dat oup) - (write data oup) ;; send as sexpr - ;; (print "Sent dat: " dat " data: " data) - (if retval - (read inp) - #t)) - (begin - (print "ERROR: send called but no receiver has been setup. Please call setup first!") - #f)) - ;; NOTE: DO NOT BE TEMPTED TO LOOK AT ANY DATA ON INP HERE! - ;; (there is a listener for handling that) - ) - #f))) ;; #f means failed to connect and send - (close-input-port inp) - (close-output-port oup) - res)))))) - -;; send a request to the given host-port and register a mailbox in udata -;; wait for the mailbox data and return it -;; -(define (send-receive udata host-port handler qrykey data #!key (hostname #f)(pid #f)(params '())(timeout 20)) - (let ((mbox (make-mailbox)) - (mbox-time (current-milliseconds)) - (mboxes (udat-mboxes udata))) - (hash-table-set! mboxes qrykey mbox) - (if (send udata host-port handler qrykey data hostname: hostname pid: pid params: params) - (let* ((mbox-timeout-secs timeout) - (mbox-timeout-result 'MBOX_TIMEOUT) - (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) - (mbox-receive-time (current-milliseconds))) - (hash-table-delete! mboxes qrykey) - (if (eq? res 'MBOX_TIMEOUT) - #f - res)) - #f))) ;; #f means failed to communicate - -;; -(define (ulex-handler udata controldat data) - (print "controldat: " controldat " data: " data) - (match controldat ;; (string-split controldat) - ((handlerkey host-port pid qrykey params ...) - ;; (print "handlerkey: " handlerkey " host-port: " host-port " pid: " pid " qrykey: " qrykey " params: " params) - (case handlerkey ;; (string->symbol handlerkey) - ((ack)(print "Got ack!")) - ((ping) ;; special case - return result immediately on the same connection - (let* ((proc (hash-table-ref/default (udat-handlers udata) 'ping #f)) - (val (if proc (proc) "gotping")) - (peer (make-peer addr-port: host-port pid: pid)) - (dbshash (udat-dbowners udata))) - (peer-dbs-set! peer params) ;; params for ping is list of dbs owned by pinger - (for-each (lambda (dbfile) - (hash-table-set! dbshash dbfile host-port)) ;; WRONG? - params) ;; register each db in the dbshash - (if (not (hash-table-exists? (udat-peers udata) host-port)) - (hash-table-set! (udat-peers udata) host-port peer)) ;; save the details of this caller in peers - qrykey)) ;; End of ping - ((goodbye) - ;; remove all traces of the caller in db ownership etc. - (let* ((peer (hash-table-ref/default (udat-peers udata) host-port #f)) - (dbs (if peer (peer-dbs peer) '())) - (dbshash (udat-dbowners udata))) - (for-each (lambda (dbfile)(hash-table-delete! dbshash dbfile)) dbs) - (hash-table-delete! (udat-peers udata) host-port) - qrykey)) - ((dropcaptain) - ;; remove all traces of the captain - (udat-captain-address-set! udata #f) - (udat-captain-host-set! udata #f) - (udat-captain-port-set! udata #f) - (udat-captain-pid-set! udata #f) - qrykey) - ((rucaptain) ;; remote is asking if I'm the captain - (if (udat-my-cpkt-key udata) "yes" "no")) - ((db-owner) ;; given a db name who do I send my queries to - ;; look up the file in handlers, if have an entry ping them to be sure - ;; they are still alive and then return that host:port. - ;; if no handler found or if the ping fails pick from peers the oldest that - ;; is managing the fewest dbs - (match params - ((dbfile dbtype) - (let* ((owner-host-port (hash-table-ref/default (udat-dbowners udata) dbfile #f))) - (if owner-host-port - (conc qrykey " " owner-host-port) - (let* ((pdat (or (hash-table-ref/default (udat-peers udata) host-port #f) ;; no owner - caller gets to own it! - (make-peer addr-port: host-port pid: pid dbs: `(,dbfile))))) - (hash-table-set! (udat-peers udata) host-port pdat) - (hash-table-set! (udat-dbowners udata) dbfile host-port) - (conc qrykey " " host-port))))) - (else (conc qrykey " BADDATA")))) - ;; for work items: - ;; handler is one of; immediate, read-only, read-write, high-priority - ((immediate read-only normal low-priority) ;; do this work immediately - ;; host-port (caller), pid (caller), qrykey (cookie), params <= all from first line - ;; data => a single line encoded however you want, or should I build json into it? - (print "handlerkey=" handlerkey) - (let* ((pdat (get-peer-dat udata host-port))) - (match params ;; dbfile prockey procparam - ((dbfile prockey procparam) - (case handlerkey - ((immediate read-only) - (process-request udata pdat dbfile qrykey prockey procparam data)) - ((normal low-priority) ;; split off later and add logic to support low priority - (add-to-work-queue udata pdat dbfile qrykey prockey procparam data)) - (else - #f))) - (else - (print "INFO: params=" params " handlerkey=" handlerkey " controldat=" controldat) - #f)))) - (else - ;; (add-to-work-queue udata (get-peer-dat udata host-port) handlerkey qrykey data) - #f))) - (else - (print "BAD DATA? controldat=" controldat " data=" data) - #f)));; handles the incoming messages and dispatches to queues - -;; -(define (ulex-handler-loop udata) - (let* ((serv-listener (udat-serv-listener udata))) - ;; data comes as two lines - ;; handlerkey resp-addr:resp-port hostname pid qrykey [dbpath/dbfile.db] - ;; data - (let loop ((state 'start)) - (let-values (((inp oup)(tcp-accept serv-listener))) - (let* ((controldat (read inp)) - (data (read inp)) - (resp (ulex-handler udata controldat data))) - (if resp (write resp oup)) - (close-input-port inp) - (close-output-port oup)) - (loop state))))) - -;; add a proc to the handler list, these are done symetrically (i.e. in all instances) -;; so that the proc can be dereferenced remotely -;; -(define (register-handler udata key proc) - (hash-table-set! (udat-handlers udata) key proc)) - - -;;====================================================================== -;; work queues -;;====================================================================== - -(define (add-to-work-queue udata peer-dat handlerkey qrykey data) - (let ((wdat (make-work peer-dat: peer-dat handlerkey: handlerkey qrykey: qrykey data: data))) - (if (udat-busy udata) - (queue-add! (udat-work-queue udata) wdat) - (process-work udata wdat)) ;; passing in wdat tells process-work to first process the passed in wdat - )) - -(define (do-work udata wdat) - #f) - -(define (process-work udata #!optional wdat) - (if wdat (do-work udata wdat)) ;; process wdat - (let ((wqueue (udat-work-queue udata))) - (if (not (queue-empty? wqueue)) - (let loop ((wd (queue-remove! wqueue))) - (do-work udata wd) - (if (not (queue-empty? wqueue)) - (loop (queue-remove! wqueue))))))) - -;;====================================================================== -;; Generic db handling -;; setup a inmem db instance -;; open connection to on-disk db -;; sync on-disk db to inmem -;; get lock in on-disk db for dbowner of this db -;; put sync-proc, init-proc, on-disk handle, inmem handle in dbconn stuct -;; return the stuct -;;====================================================================== - -(defstruct dbconn - (fname #f) - (inmem #f) - (conn #f) - (sync #f) ;; sync proc - (init #f) ;; init proc - (lastsync (current-seconds)) - ) - -(defstruct dbinfo - (initproc #f) - (syncproc #f)) - -;; open inmem and disk database -;; init with initproc -;; return db struct -;; -;; appname; megatest, ulex or something else. -;; -(define (setup-db-connection udata fname-in appname dbtype) - (let* ((is-ulex (eq? appname 'ulex)) - (dbinf (if is-ulex ;; ulex is a built-in special case - (make-dbinfo initproc: ulexdb-init syncproc: ulexdb-sync) - (hash-table-ref/default (udat-dbtypes udata) dbtype #f))) - (initproc (dbinfo-initproc dbinf)) - (syncproc (dbinfo-syncproc dbinf)) - (fname (if is-ulex - (conc (udat-ulex-dir udata) "/ulex.db") - fname-in)) - (inmem-db (open-and-initdb udata #f 'inmem (dbinfo-initproc dbinf))) - (disk-db (open-and-initdb udata fname 'disk (dbinfo-initproc dbinf)))) - (make-dbconn inmem: inmem-db conn: disk-db sync: syncproc init: initproc))) - -;; dest='inmem or 'disk -;; -(define (open-and-initdb udata filename dest init-proc) - (let* ((inmem (eq? dest 'inmem)) - (dbfile (if inmem - ":INMEM:" - filename)) - (dbexists (if inmem #t (file-exists? dbfile))) - (db (sqlite3:open-database dbfile))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (if (not dbexists) - (init-proc db)) - db)) - - -;;====================================================================== -;; Previous Ulex db stuff -;;====================================================================== - -(define (ulexdb-init db inmem) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (stmt) - (if stmt (sqlite3:execute db stmt))) - `("CREATE TABLE IF NOT EXISTS processes - (id INTEGER PRIMARY KEY, - host TEXT NOT NULL, - ipadr TEXT NOT NULL, - port INTEGER NOT NULL, - pid INTEGER NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - (if inmem - "CREATE TRIGGER IF NOT EXISTS update_proces_trigger AFTER UPDATE ON processes - FOR EACH ROW - BEGIN - UPDATE processes SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" - #f)))))) - -;; open databases, do initial sync -(define (ulexdb-sync dbconndat udata) - #f) - - -) ;; END OF ULEX - - -;;; ;;====================================================================== -;;; ;; D E B U G H E L P E R S -;;; ;;====================================================================== -;;; -;;; (define (dbg> . args) -;;; (with-output-to-port (current-error-port) -;;; (lambda () -;;; (apply print "dbg> " args)))) -;;; -;;; (define (debug-pp . args) -;;; (if (get-environment-variable "ULEX_DEBUG") -;;; (with-output-to-port (current-error-port) -;;; (lambda () -;;; (apply pp args))))) -;;; -;;; (define *default-debug-port* (current-error-port)) -;;; -;;; (define (sdbg> fn stage-name stage-start stage-end start-time . message) -;;; (if (get-environment-variable "ULEX_DEBUG") -;;; (with-output-to-port *default-debug-port* -;;; (lambda () -;;; (apply print "ulex:" fn " " stage-name " took " (- (if stage-end stage-end (current-milliseconds)) stage-start) " ms. " -;;; (if start-time -;;; (conc "total time " (- (current-milliseconds) start-time) -;;; " ms.") -;;; "") -;;; message -;;; ))))) - -;;====================================================================== -;; M A C R O S -;;====================================================================== -;; iup callbacks are not dumping the stack, this is a work-around -;; - -;; Some of these routines use: -;; -;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html -;; -;; Syntax for defining macros in a simple style similar to function definiton, -;; when there is a single pattern for the argument list and there are no keywords. -;; -;; (define-simple-syntax (name arg ...) body ...) -;; -;; -;; (define-syntax define-simple-syntax -;; (syntax-rules () -;; ((_ (name arg ...) body ...) -;; (define-syntax name (syntax-rules () ((name arg ...) (begin body ...))))))) -;; -;; (define-simple-syntax (catch-and-dump proc procname) -;; (handle-exceptions -;; exn -;; (begin -;; (print-call-chain (current-error-port)) -;; (with-output-to-port (current-error-port) -;; (lambda () -;; (print ((condition-property-accessor 'exn 'message) exn)) -;; (print "Callback error in " procname) -;; (print "Full condition info:\n" (condition->list exn))))) -;; (proc))) -;; -;; -;;====================================================================== -;; R E C O R D S -;;====================================================================== - -;;; ;; information about me as a server -;;; ;; -;;; (defstruct area -;;; ;; about this area -;;; (useportlogger #f) -;;; (lowport 32768) -;;; (server-type 'auto) ;; auto=create up to five servers/pkts, main=create pkts, passive=no pkt (unless there are no pkts at all) -;;; (conn #f) -;;; (port #f) -;;; (myaddr (get-my-best-address)) -;;; pktid ;; get pkt from hosts table if needed -;;; pktfile -;;; pktsdir -;;; dbdir -;;; (dbhandles (make-hash-table)) ;; fname => list-of-dbh, NOTE: Should really never need more than one? -;;; (mutex (make-mutex)) -;;; (rtable (make-hash-table)) ;; registration table of available actions -;;; (dbs (make-hash-table)) ;; filename => random number, used for choosing what dbs I serve -;;; ;; about other servers -;;; (hosts (make-hash-table)) ;; key => hostdat -;;; (hoststats (make-hash-table)) ;; key => alist of fname => ( qcount . qtime ) -;;; (reqs (make-hash-table)) ;; uri => queue -;;; ;; work queues -;;; (wqueues (make-hash-table)) ;; fname => qdat -;;; (stats (make-hash-table)) ;; fname => totalqueries -;;; (last-srvup (current-seconds)) ;; last time we updated the known servers -;;; (cookie2mbox (make-hash-table)) ;; map cookie for outstanding request to mailbox of awaiting call -;;; (ready #f) -;;; (health (make-hash-table)) ;; ipaddr:port => num failed pings since last good ping -;;; ) -;;; -;;; ;; host stats -;;; ;; -;;; (defstruct hostdat -;;; (pkt #f) -;;; (dbload (make-hash-table)) ;; "dbfile.db" => queries/min -;;; (hostload #f) ;; normalized load ( 5min load / numcpus ) -;;; ) -;;; -;;; ;; dbdat -;;; ;; -;;; (defstruct dbdat -;;; (dbh #f) -;;; (fname #f) -;;; (write-access #f) -;;; (sths (make-hash-table)) ;; hash mapping query strings to handles -;;; ) -;;; -;;; ;; qdat -;;; ;; -;;; (defstruct qdat -;;; (writeq (make-queue)) -;;; (readq (make-queue)) -;;; (rwq (make-queue)) -;;; (logq (make-queue)) ;; do we need a queue for logging? yes, if we use sqlite3 db for logging -;;; (osshort (make-queue)) -;;; (oslong (make-queue)) -;;; (misc (make-queue)) ;; used for things like ping-full -;;; ) -;;; -;;; ;; calldat -;;; ;; -;;; (defstruct calldat -;;; (ctype 'dbwrite) -;;; (obj #f) ;; this would normally be an SQL statement e.g. SELECT, INSERT etc. -;;; (rtime (current-milliseconds))) -;;; -;;; ;; make it a global? Well, it is local to area module -;;; -;;; (define *pktspec* -;;; `((server (hostname . h) -;;; (port . p) -;;; (pid . i) -;;; (ipaddr . a) -;;; ) -;;; (data (hostname . h) ;; sender hostname -;;; (port . p) ;; sender port -;;; (ipaddr . a) ;; sender ip -;;; (hostkey . k) ;; sending host key - store info at server under this key -;;; (servkey . s) ;; server key - this needs to match at server end or reject the msg -;;; (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json -;;; (data . d) ;; base64 encoded slln data -;;; ))) -;;; -;;; ;; work item -;;; ;; -;;; (defstruct witem -;;; (rhost #f) ;; return host -;;; (ripaddr #f) ;; return ipaddr -;;; (rport #f) ;; return port -;;; (servkey #f) ;; the packet representing the client of this workitem, used by final send-message -;;; (rdat #f) ;; the request - usually an sql query, type is rdat -;;; (action #f) ;; the action: immediate, dbwrite, dbread,oslong, osshort -;;; (cookie #f) ;; cookie id for response -;;; (data #f) ;; the data payload, i.e. parameters -;;; (result #f) ;; the result from processing the data -;;; (caller #f)) ;; the calling peer according to rpc itself -;;; -;;; (define (trim-pktid pktid) -;;; (if (string? pktid) -;;; (substring pktid 0 4) -;;; "nopkt")) -;;; -;;; (define (any->number num) -;;; (cond -;;; ((number? num) num) -;;; ((string? num) (string->number num)) -;;; (else num))) -;;; -;;; (use trace) -;;; (trace-call-sites #t) -;;; -;;; ;;====================================================================== -;;; ;; D A T A B A S E H A N D L I N G -;;; ;;====================================================================== -;;; -;;; ;; look in dbhandles for a db, return it, else return #f -;;; ;; -;;; (define (get-dbh acfg fname) -;;; (let ((dbh-lst (hash-table-ref/default (area-dbhandles acfg) fname '()))) -;;; (if (null? dbh-lst) -;;; (begin -;;; ;; (print "opening db for " fname) -;;; (open-db acfg fname)) ;; Note that the handles get put back in the queue in the save-dbh calls -;;; (let ((rem-lst (cdr dbh-lst))) -;;; ;; (print "re-using saved connection for " fname) -;;; (hash-table-set! (area-dbhandles acfg) fname rem-lst) -;;; (car dbh-lst))))) -;;; -;;; (define (save-dbh acfg fname dbdat) -;;; ;; (print "saving dbh for " fname) -;;; (hash-table-set! (area-dbhandles acfg) fname (cons dbdat (hash-table-ref/default (area-dbhandles acfg) fname '())))) -;;; -;;; ;; open the database, if never before opened init it. put the handle in the -;;; ;; open db's hash table -;;; ;; returns: the dbdat -;;; ;; -;;; (define (open-db acfg fname) -;;; (let* ((fullname (conc (area-dbdir acfg) "/" fname)) -;;; (exists (file-exists? fullname)) -;;; (write-access (if exists -;;; (file-write-access? fullname) -;;; (file-write-access? (area-dbdir acfg)))) -;;; (db (sqlite3:open-database fullname)) -;;; (handler (sqlite3:make-busy-timeout 136000)) -;;; ) -;;; (sqlite3:set-busy-handler! db handler) -;;; (sqlite3:execute db "PRAGMA synchronous = 0;") -;;; (if (not exists) ;; need to init the db -;;; (if write-access -;;; (let ((isql (get-rsql acfg 'dbinitsql))) ;; get the init sql statements -;;; ;; (sqlite3:with-transaction -;;; ;; db -;;; ;; (lambda () -;;; (if isql -;;; (for-each -;;; (lambda (sql) -;;; (sqlite3:execute db sql)) -;;; isql))) -;;; (print "ERROR: no write access to " (area-dbdir acfg)))) -;;; (make-dbdat dbh: db fname: fname write-access: write-access))) -;;; -;;; ;; This is a low-level command to retrieve or to prepare, save and return a prepared statment -;;; ;; you must extract the db handle -;;; ;; -;;; (define (get-sth db cache stmt) -;;; (if (hash-table-exists? cache stmt) -;;; (begin -;;; ;; (print "Reusing cached stmt for " stmt) -;;; (hash-table-ref/default cache stmt #f)) -;;; (let ((sth (sqlite3:prepare db stmt))) -;;; (hash-table-set! cache stmt sth) -;;; ;; (print "prepared stmt for " stmt) -;;; sth))) -;;; -;;; ;; a little more expensive but does all the tedious deferencing - only use if you don't already -;;; ;; have dbdat and db sitting around -;;; ;; -;;; (define (full-get-sth acfg fname stmt) -;;; (let* ((dbdat (get-dbh acfg fname)) -;;; (db (dbdat-dbh dbdat)) -;;; (sths (dbdat-sths dbdat))) -;;; (get-sth db sths stmt))) -;;; -;;; ;; write to a db -;;; ;; acfg: area data -;;; ;; rdat: request data -;;; ;; hdat: (host . port) -;;; ;; -;;; ;; (define (dbwrite acfg rdat hdat data-in) -;;; ;; (let* ((dbname (car data-in)) -;;; ;; (dbdat (get-dbh acfg dbname)) -;;; ;; (db (dbdat-dbh dbdat)) -;;; ;; (sths (dbdat-sths dbdat)) -;;; ;; (stmt (calldat-obj rdat)) -;;; ;; (sth (get-sth db sths stmt)) -;;; ;; (data (cdr data-in))) -;;; ;; (print "dbname: " dbname " acfg: " acfg " rdat: " (calldat->alist rdat) " hdat: " hdat " data: " data) -;;; ;; (print "dbdat: " (dbdat->alist dbdat)) -;;; ;; (apply sqlite3:execute sth data) -;;; ;; (save-dbh acfg dbname dbdat) -;;; ;; #t -;;; ;; )) -;;; -;;; (define (finalize-all-db-handles acfg) -;;; (let* ((dbhandles (area-dbhandles acfg)) ;; dbhandles is hash of fname ==> dbdat -;;; (num 0)) -;;; (for-each -;;; (lambda (area-name) -;;; (print "Closing handles for " area-name) -;;; (let ((dbdats (hash-table-ref/default dbhandles area-name '()))) -;;; (for-each -;;; (lambda (dbdat) -;;; ;; first close all statement handles -;;; (for-each -;;; (lambda (sth) -;;; (sqlite3:finalize! sth) -;;; (set! num (+ num 1))) -;;; (hash-table-values (dbdat-sths dbdat))) -;;; ;; now close the dbh -;;; (set! num (+ num 1)) -;;; (sqlite3:finalize! (dbdat-dbh dbdat))) -;;; dbdats))) -;;; (hash-table-keys dbhandles)) -;;; (print "FINALIZED " num " dbhandles"))) -;;; -;;; ;;====================================================================== -;;; ;; W O R K Q U E U E H A N D L I N G -;;; ;;====================================================================== -;;; -;;; (define (register-db-as-mine acfg dbname) -;;; (let ((ht (area-dbs acfg))) -;;; (if (not (hash-table-ref/default ht dbname #f)) -;;; (hash-table-set! ht dbname (random 10000))))) -;;; -;;; (define (work-queue-add acfg fname witem) -;;; (let* ((work-queue-start (current-milliseconds)) -;;; (action (witem-action witem)) ;; NB the action is the index into the rdat actions -;;; (qdat (or (hash-table-ref/default (area-wqueues acfg) fname #f) -;;; (let ((newqdat (make-qdat))) -;;; (hash-table-set! (area-wqueues acfg) fname newqdat) -;;; newqdat))) -;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f))) -;;; (if rdat -;;; (queue-add! -;;; (case (calldat-ctype rdat) -;;; ((dbwrite) (register-db-as-mine acfg fname)(qdat-writeq qdat)) -;;; ((dbread) (register-db-as-mine acfg fname)(qdat-readq qdat)) -;;; ((dbrw) (register-db-as-mine acfg fname)(qdat-rwq qdat)) -;;; ((oslong) (qdat-oslong qdat)) -;;; ((osshort) (qdat-osshort qdat)) -;;; ((full-ping) (qdat-misc qdat)) -;;; (else -;;; (print "ERROR: no queue for " action ". Adding to dbwrite queue.") -;;; (qdat-writeq qdat))) -;;; witem) -;;; (case action -;;; ((full-ping)(qdat-misc qdat)) -;;; (else -;;; (print "ERROR: No action " action " was registered")))) -;;; (sdbg> "work-queue-add" "queue-add" work-queue-start #f #f) -;;; #t)) ;; for now, simply return #t to indicate request got to the queue -;;; -;;; (define (doqueue acfg q fname dbdat dbh) -;;; ;; (print "doqueue: " fname) -;;; (let* ((start-time (current-milliseconds)) -;;; (qlen (queue-length q))) -;;; (if (> qlen 1) -;;; (print "Processing queue of length " qlen)) -;;; (let loop ((count 0) -;;; (responses '())) -;;; (let ((delta (- (current-milliseconds) start-time))) -;;; (if (or (queue-empty? q) -;;; (> delta 400)) ;; stop working on this queue after 400ms have passed -;;; (list count delta responses) ;; return count, delta and responses list -;;; (let* ((witem (queue-remove! q)) -;;; (action (witem-action witem)) -;;; (rdat (witem-rdat witem)) -;;; (stmt (calldat-obj rdat)) -;;; (sth (full-get-sth acfg fname stmt)) -;;; (ctype (calldat-ctype rdat)) -;;; (data (witem-data witem)) -;;; (cookie (witem-cookie witem))) -;;; ;; do the processing and save the result in witem-result -;;; (witem-result-set! -;;; witem -;;; (case ctype ;; action -;;; ((noblockwrite) ;; blind write, no ack of success returned -;;; (apply sqlite3:execute sth data) -;;; (sqlite3:last-insert-rowid dbh)) -;;; ((dbwrite) ;; blocking write -;;; (apply sqlite3:execute sth data) -;;; #t) -;;; ((dbread) ;; TODO: consider breaking this up and shipping in pieces for large query -;;; (apply sqlite3:map-row (lambda x x) sth data)) -;;; ((full-ping) 'full-ping) -;;; (else (print "Not ready for action " action) #f))) -;;; (loop (add1 count) -;;; (if cookie -;;; (cons witem responses) -;;; responses)))))))) -;;; -;;; ;; do up to 400ms of processing on each queue -;;; ;; - the work-queue-processor will allow the max 1200ms of work to complete but it will flag as overloaded -;;; ;; -;;; (define (process-db-queries acfg fname) -;;; (if (hash-table-exists? (area-wqueues acfg) fname) -;;; (let* ((process-db-queries-start-time (current-milliseconds)) -;;; (qdat (hash-table-ref/default (area-wqueues acfg) fname #f)) -;;; (queue-sym->queue (lambda (queue-sym) -;;; (case queue-sym ;; lookup the queue from qdat given a name (symbol) -;;; ((wqueue) (qdat-writeq qdat)) -;;; ((rqueue) (qdat-readq qdat)) -;;; ((rwqueue) (qdat-rwq qdat)) -;;; ((misc) (qdat-misc qdat)) -;;; (else #f)))) -;;; (dbdat (get-dbh acfg fname)) -;;; (dbh (if (dbdat? dbdat)(dbdat-dbh dbdat) #f)) -;;; (nowtime (current-seconds))) -;;; ;; handle the queues that require a transaction -;;; ;; -;;; (map ;; -;;; (lambda (queue-sym) -;;; ;; (print "processing queue " queue-sym) -;;; (let* ((queue (queue-sym->queue queue-sym))) -;;; (if (not (queue-empty? queue)) -;;; (let ((responses -;;; (sqlite3:with-transaction ;; todo - catch exceptions... -;;; dbh -;;; (lambda () -;;; (let* ((res (doqueue acfg queue fname dbdat dbh))) ;; this does the work! -;;; ;; (print "res=" res) -;;; (match res -;;; ((count delta responses) -;;; (update-stats acfg fname queue-sym delta count) -;;; (sdbg> "process-db-queries" "sqlite3-transaction" process-db-queries-start-time #f #f) -;;; responses) ;; return responses -;;; (else -;;; (print "ERROR: bad return data from doqueue " res))) -;;; ))))) -;;; ;; having completed the transaction, send the responses. -;;; ;; (print "INFO: sending " (length responses) " responses.") -;;; (let loop ((responses-left responses)) -;;; (cond -;;; ((null? responses-left) #t) -;;; (else -;;; (let* ((witem (car responses-left)) -;;; (response (cdr responses-left))) -;;; (call-deliver-response acfg (witem-ripaddr witem)(witem-rport witem) -;;; (witem-cookie witem)(witem-result witem))) -;;; (loop (cdr responses-left)))))) -;;; ))) -;;; '(wqueue rwqueue rqueue)) -;;; -;;; ;; handle misc queue -;;; ;; -;;; ;; (print "processing misc queue") -;;; (let ((queue (queue-sym->queue 'misc))) -;;; (doqueue acfg queue fname dbdat dbh)) -;;; ;; .... -;;; (save-dbh acfg fname dbdat) -;;; #t ;; just to let the tests know we got here -;;; ) -;;; #f ;; nothing processed -;;; )) -;;; -;;; ;; run all queues in parallel per db but sequentially per queue for that db. -;;; ;; - process the queues every 500 or so ms -;;; ;; - allow for long running queries to continue but all other activities for that -;;; ;; db will be blocked. -;;; ;; -;;; (define (work-queue-processor acfg) -;;; (let* ((threads (make-hash-table))) ;; fname => thread -;;; (let loop ((fnames (hash-table-keys (area-wqueues acfg))) -;;; (target-time (+ (current-milliseconds) 50))) -;;; ;;(if (not (null? fnames))(print "Processing for these databases: " fnames)) -;;; (for-each -;;; (lambda (fname) -;;; ;; (print "processing for " fname) -;;; ;;(process-db-queries acfg fname)) -;;; (let ((th (hash-table-ref/default threads fname #f))) -;;; (if (and th (not (member (thread-state th) '(dead terminated)))) -;;; (begin -;;; (print "WARNING: worker thread for " fname " is taking a long time.") -;;; (print "Thread is in state " (thread-state th))) -;;; (let ((th1 (make-thread (lambda () -;;; ;; (catch-and-dump -;;; ;; (lambda () -;;; ;; (print "Process queries for " fname) -;;; (let ((start-time (current-milliseconds))) -;;; (process-db-queries acfg fname) -;;; ;; (thread-sleep! 0.01) ;; need the thread to take at least some time -;;; (hash-table-delete! threads fname)) ;; no mutexes? -;;; fname) -;;; "th1"))) ;; )) -;;; (hash-table-set! threads fname th1) -;;; (thread-start! th1))))) -;;; fnames) -;;; ;; (thread-sleep! 0.1) ;; give the threads some time to process requests -;;; ;; burn time until 400ms is up -;;; (let ((now-time (current-milliseconds))) -;;; (if (< now-time target-time) -;;; (let ((delta (- target-time now-time))) -;;; (thread-sleep! (/ delta 1000))))) -;;; (loop (hash-table-keys (area-wqueues acfg)) -;;; (+ (current-milliseconds) 50))))) -;;; -;;; ;;====================================================================== -;;; ;; S T A T S G A T H E R I N G -;;; ;;====================================================================== -;;; -;;; (defstruct stat -;;; (qcount-avg 0) ;; coarse running average -;;; (qtime-avg 0) ;; coarse running average -;;; (qcount 0) ;; total -;;; (qtime 0) ;; total -;;; (last-qcount 0) ;; last -;;; (last-qtime 0) ;; last -;;; (dbs '()) ;; list of db files handled by this node -;;; (when 0)) ;; when the last query happened - seconds -;;; -;;; -;;; (define (update-stats acfg fname bucket duration numqueries) -;;; (let* ((key fname) ;; for now do not use bucket. Was: (conc fname "-" bucket)) ;; lazy but good enough -;;; (stats (or (hash-table-ref/default (area-stats acfg) key #f) -;;; (let ((newstats (make-stat))) -;;; (hash-table-set! (area-stats acfg) key newstats) -;;; newstats)))) -;;; ;; when the last query happended (used to remove the fname from the active list) -;;; (stat-when-set! stats (current-seconds)) -;;; ;; last values -;;; (stat-last-qcount-set! stats numqueries) -;;; (stat-last-qtime-set! stats duration) -;;; ;; total over process lifetime -;;; (stat-qcount-set! stats (+ (stat-qcount stats) numqueries)) -;;; (stat-qtime-set! stats (+ (stat-qtime stats) duration)) -;;; ;; coarse average -;;; (stat-qcount-avg-set! stats (/ (+ (stat-qcount-avg stats) numqueries) 2)) -;;; (stat-qtime-avg-set! stats (/ (+ (stat-qtime-avg stats) duration) 2)) -;;; -;;; ;; here is where we add the stats for a given dbfile -;;; (if (not (member fname (stat-dbs stats))) -;;; (stat-dbs-set! stats (cons fname (stat-dbs stats)))) -;;; -;;; )) -;;; -;;; ;;====================================================================== -;;; ;; S E R V E R S T U F F -;;; ;;====================================================================== -;;; -;;; ;; this does NOT return! -;;; ;; -;;; (define (find-free-port-and-open acfg) -;;; (let ((port (or (area-port acfg) 3200))) -;;; (handle-exceptions -;;; exn -;;; (begin -;;; (print "INFO: cannot bind to port " (rpc:default-server-port) ", trying next port") -;;; (area-port-set! acfg (+ port 1)) -;;; (find-free-port-and-open acfg)) -;;; (rpc:default-server-port port) -;;; (area-port-set! acfg port) -;;; (tcp-read-timeout 120000) -;;; ;; ((rpc:make-server (tcp-listen port)) #t) -;;; (tcp-listen (rpc:default-server-port) -;;; )))) -;;; -;;; ;; register this node by putting a packet into the pkts dir. -;;; ;; look for other servers -;;; ;; contact other servers and compile list of servers -;;; ;; there are two types of server -;;; ;; main servers - dashboards, runners and dedicated servers - need pkt -;;; ;; passive servers - test executers, step calls, list-runs - no pkt -;;; ;; -;;; (define (register-node acfg hostip port-num) -;;; ;;(mutex-lock! (area-mutex acfg)) -;;; (let* ((server-type (area-server-type acfg)) ;; auto, main, passive (no pkt created) -;;; (best-ip (or hostip (get-my-best-address))) -;;; (mtdir (area-dbdir acfg)) -;;; (pktdir (area-pktsdir acfg))) ;; conc mtdir "/.server-pkts"))) -;;; (print "Registering node " best-ip ":" port-num) -;;; (if (not mtdir) ;; require a home for this node to put or find databases -;;; #f -;;; (begin -;;; (if (not (directory? pktdir))(create-directory pktdir)) -;;; ;; server is started, now create pkt if needed -;;; (print "Starting server in " server-type " mode with port " port-num) -;;; (if (member server-type '(auto main)) ;; TODO: if auto, count number of servers registers, if > 3 then don't put out a pkt -;;; (begin -;;; (area-pktid-set! acfg -;;; (write-alist->pkt -;;; pktdir -;;; `((hostname . ,(get-host-name)) -;;; (ipaddr . ,best-ip) -;;; (port . ,port-num) -;;; (pid . ,(current-process-id))) -;;; pktspec: *pktspec* -;;; ptype: 'server)) -;;; (area-pktfile-set! acfg (conc pktdir "/" (area-pktid acfg) ".pkt")))) -;;; (area-port-set! acfg port-num) -;;; #;(mutex-unlock! (area-mutex acfg)))))) -;;; -;;; (define *cookie-seqnum* 0) -;;; (define (make-cookie key) -;;; (set! *cookie-seqnum* (add1 *cookie-seqnum*)) -;;; ;;(print "MAKE COOKIE CALLED -- on "servkey"-"*cookie-seqnum*) -;;; (conc key "-" *cookie-seqnum*) -;;; ) -;;; -;;; ;; dispatch locally if possible -;;; ;; -;;; (define (call-deliver-response acfg ipaddr port cookie data) -;;; (if (and (equal? (area-myaddr acfg) ipaddr) -;;; (equal? (area-port acfg) port)) -;;; (deliver-response acfg cookie data) -;;; ((rpc:procedure 'response ipaddr port) cookie data))) -;;; -;;; (define (deliver-response acfg cookie data) -;;; (let ((deliver-response-start (current-milliseconds))) -;;; (thread-start! (make-thread -;;; (lambda () -;;; (let loop ((tries-left 5)) -;;; ;;(print "TOP OF DELIVER_RESPONSE LOOP; triesleft="tries-left) -;;; ;;(pp (hash-table->alist (area-cookie2mbox acfg))) -;;; (let* ((mbox (hash-table-ref/default (area-cookie2mbox acfg) cookie #f))) -;;; (cond -;;; ((eq? 0 tries-left) -;;; (print "ulex:deliver-response: I give up. Mailbox never appeared. cookie="cookie) -;;; ) -;;; (mbox -;;; ;;(print "got mbox="mbox" got data="data" send.") -;;; (mailbox-send! mbox data)) -;;; (else -;;; ;;(print "no mbox yet. look for "cookie) -;;; (thread-sleep! (/ (- 6 tries-left) 10)) -;;; (loop (sub1 tries-left)))))) -;;; ;; (debug-pp (list (conc "ulex:deliver-response took " (- (current-milliseconds) deliver-response-start) " ms, cookie=" cookie " data=") data)) -;;; (sdbg> "deliver-response" "mailbox-send" deliver-response-start #f #f cookie) -;;; ) -;;; (conc "deliver-response thread for cookie="cookie)))) -;;; #t) -;;; -;;; ;; action: -;;; ;; immediate - quick actions, no need to put in queues -;;; ;; dbwrite - put in dbwrite queue -;;; ;; dbread - put in dbread queue -;;; ;; oslong - os actions, e.g. du, that could take a long time -;;; ;; osshort - os actions that should be quick, e.g. df -;;; ;; -;;; (define (request acfg from-ipaddr from-port servkey action cookie fname params) ;; std-peer-handler -;;; ;; NOTE: Use rpc:current-peer for getting return address -;;; (let* ((std-peer-handler-start (current-milliseconds)) -;;; ;; (raw-data (alist-ref 'data dat)) -;;; (rdat (hash-table-ref/default -;;; (area-rtable acfg) action #f)) ;; this looks up the sql query or other details indexed by the action -;;; (witem (make-witem ripaddr: from-ipaddr ;; rhost: from-host -;;; rport: from-port action: action -;;; rdat: rdat cookie: cookie -;;; servkey: servkey data: params ;; TODO - rename data to params -;;; caller: (rpc:current-peer)))) -;;; (if (not (equal? servkey (area-pktid acfg))) -;;; `(#f . ,(conc "I don't know you servkey=" servkey ", pktid=" (area-pktid acfg))) ;; immediately return this -;;; (let* ((ctype (if rdat -;;; (calldat-ctype rdat) ;; is this necessary? these should be identical -;;; action))) -;;; (sdbg> "std-peer-handler" "immediate" std-peer-handler-start #f #f) -;;; (case ctype -;;; ;; (dbwrite acfg rdat (cons from-ipaddr from-port) data))) -;;; ((full-ping) `(#t "ack to full ping" ,(work-queue-add acfg fname witem) ,cookie)) -;;; ((response) `(#t "ack from requestor" ,(deliver-response acfg fname params))) -;;; ((dbwrite) `(#t "db write submitted" ,(work-queue-add acfg fname witem) ,cookie)) -;;; ((dbread) `(#t "db read submitted" ,(work-queue-add acfg fname witem) ,cookie )) -;;; ((dbrw) `(#t "db read/write submitted" ,cookie)) -;;; ((osshort) `(#t "os short submitted" ,cookie)) -;;; ((oslong) `(#t "os long submitted" ,cookie)) -;;; (else `(#f "unrecognised action" ,ctype))))))) -;;; -;;; ;; Call this to start the actual server -;;; ;; -;;; ;; start_server -;;; ;; -;;; ;; mode: ' -;;; ;; handler: proc which takes pktrecieved as argument -;;; ;; -;;; -;;; (define (start-server acfg) -;;; (let* ((conn (find-free-port-and-open acfg)) -;;; (port (area-port acfg))) -;;; (rpc:publish-procedure! -;;; 'delist-db -;;; (lambda (fname) -;;; (hash-table-delete! (area-dbs acfg) fname))) -;;; (rpc:publish-procedure! -;;; 'calling-addr -;;; (lambda () -;;; (rpc:current-peer))) -;;; (rpc:publish-procedure! -;;; 'ping -;;; (lambda ()(real-ping acfg))) -;;; (rpc:publish-procedure! -;;; 'request -;;; (lambda (from-addr from-port servkey action cookie dbname params) -;;; (request acfg from-addr from-port servkey action cookie dbname params))) -;;; (rpc:publish-procedure! -;;; 'response -;;; (lambda (cookie res-dat) -;;; (deliver-response acfg cookie res-dat))) -;;; (area-ready-set! acfg #t) -;;; (area-conn-set! acfg conn) -;;; ((rpc:make-server conn) #f)));; ((tcp-listen (rpc:default-server-port)) #t) -;;; -;;; -;;; (define (launch acfg) ;; #!optional (proc std-peer-handler)) -;;; (print "starting launch") -;;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) -;;; #;(let ((original-handler (current-exception-handler))) ;; is th -;;; (lambda (exception) -;;; (server-exit-procedure) -;;; (original-handler exception))) -;;; (on-exit (lambda () -;;; (shutdown acfg))) ;; (finalize-all-db-handles acfg))) -;;; ;; set up the rpc handler -;;; (let* ((th1 (make-thread -;;; (lambda ()(start-server acfg)) -;;; "server thread")) -;;; (th2 (make-thread -;;; (lambda () -;;; (print "th2 starting") -;;; (let loop () -;;; (work-queue-processor acfg) -;;; (print "work-queue-processor crashed!") -;;; (loop))) -;;; "work queue thread"))) -;;; (thread-start! th1) -;;; (thread-start! th2) -;;; (let loop () -;;; (thread-sleep! 0.025) -;;; (if (area-ready acfg) -;;; #t -;;; (loop))) -;;; ;; attempt to fix my address -;;; (let* ((all-addr (get-all-ips-sorted))) ;; could use (tcp-addresses conn)? -;;; (let loop ((rem-addrs all-addr)) -;;; (if (null? rem-addrs) -;;; (begin -;;; (print "ERROR: Failed to figure out the ip address of myself as a server. Giving up.") -;;; (exit 1)) ;; BUG Changeme to raising an exception -;;; -;;; (let* ((addr (car rem-addrs)) -;;; (good-addr (handle-exceptions -;;; exn -;;; #f -;;; ((rpc:procedure 'calling-addr addr (area-port acfg)))))) -;;; (if good-addr -;;; (begin -;;; (print "Got good-addr of " good-addr) -;;; (area-myaddr-set! acfg good-addr)) -;;; (loop (cdr rem-addrs))))))) -;;; (register-node acfg (area-myaddr acfg)(area-port acfg)) -;;; (print "INFO: Server started on " (area-myaddr acfg) ":" (area-port acfg)) -;;; ;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) -;;; )) -;;; -;;; (define (clear-server-pkt acfg) -;;; (let ((pktf (area-pktfile acfg))) -;;; (if pktf (delete-file* pktf)))) -;;; -;;; (define (shutdown acfg) -;;; (let (;;(conn (area-conn acfg)) -;;; (pktf (area-pktfile acfg)) -;;; (port (area-port acfg))) -;;; (if pktf (delete-file* pktf)) -;;; (send-all "imshuttingdown") -;;; ;; (rpc:close-all-connections!) ;; don't know if this is actually needed -;;; (finalize-all-db-handles acfg))) -;;; -;;; (define (send-all msg) -;;; #f) -;;; -;;; ;; given a area record look up all the packets -;;; ;; -;;; (define (get-all-server-pkts acfg) -;;; (let ((all-pkt-files (glob (conc (area-pktsdir acfg) "/*.pkt")))) -;;; (map (lambda (pkt-file) -;;; (read-pkt->alist pkt-file pktspec: *pktspec*)) -;;; all-pkt-files))) -;;; -;;; #;((Z . "9a0212302295a19610d5796fce0370fa130758e9") -;;; (port . "34827") -;;; (pid . "28748") -;;; (hostname . "zeus") -;;; (T . "server") -;;; (D . "1549427032.0")) -;;; -;;; #;(define (get-my-best-address) -;;; (let ((all-my-addresses (get-all-ips))) ;; (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))) -;;; (cond -;;; ((null? all-my-addresses) -;;; (get-host-name)) ;; no interfaces? -;;; ((eq? (length all-my-addresses) 1) -;;; (ip->string (car all-my-addresses))) ;; only one to choose from, just go with it -;;; (else -;;; (ip->string (car (filter (lambda (x) ;; take any but 127. -;;; (not (eq? (u8vector-ref x 0) 127))) -;;; all-my-addresses))))))) -;;; -;;; ;; whoami? I am my pkt -;;; ;; -;;; (define (whoami? acfg) -;;; (hash-table-ref/default (area-hosts acfg)(area-pktid acfg) #f)) -;;; -;;; ;;====================================================================== -;;; ;; "Client side" operations -;;; ;;====================================================================== -;;; -;;; (define (safe-call call-key host port . params) -;;; (handle-exceptions -;;; exn -;;; (begin -;;; (print "Call " call-key " to " host ":" port " failed") -;;; #f) -;;; (apply (rpc:procedure call-key host port) params))) -;;; -;;; ;; ;; convert to/from string / sexpr -;;; ;; -;;; ;; (define (string->sexpr str) -;;; ;; (if (string? str) -;;; ;; (with-input-from-string str read) -;;; ;; str)) -;;; ;; -;;; ;; (define (sexpr->string s) -;;; ;; (with-output-to-string (lambda ()(write s)))) -;;; -;;; ;; is the server alive? -;;; ;; -;;; (define (ping acfg host port) -;;; (let* ((myaddr (area-myaddr acfg)) -;;; (myport (area-port acfg)) -;;; (start-time (current-milliseconds)) -;;; (res (if (and (equal? myaddr host) -;;; (equal? myport port)) -;;; (real-ping acfg) -;;; ((rpc:procedure 'ping host port))))) -;;; (cons (- (current-milliseconds) start-time) -;;; res))) -;;; -;;; ;; returns ( ipaddr port alist-fname=>randnum ) -;;; (define (real-ping acfg) -;;; `(,(area-myaddr acfg) ,(area-port acfg) ,(get-host-stats acfg))) -;;; -;;; ;; is the server alive AND the queues processing? -;;; ;; -;;; #;(define (full-ping acfg servpkt) -;;; (let* ((start-time (current-milliseconds)) -;;; (res (send-message acfg servpkt '(full-ping) 'full-ping))) -;;; (cons (- (current-milliseconds) start-time) -;;; res))) ;; (equal? res "got ping")))) -;;; -;;; -;;; ;; look up all pkts and get the server id (the hash), port, host/ip -;;; ;; store this info in acfg -;;; ;; return the number of responsive servers found -;;; ;; -;;; ;; DO NOT VERIFY THAT THE SERVER IS ALIVE HERE. This is called at times where the current server is not yet alive and cannot ping itself -;;; ;; -;;; (define (update-known-servers acfg) -;;; ;; readll all pkts -;;; ;; foreach pkt; if it isn't me ping the server; if alive, add to hosts hash, else rm the pkt -;;; (let* ((start-time (current-milliseconds)) -;;; (all-pkts (delete-duplicates -;;; (append (get-all-server-pkts acfg) -;;; (hash-table-values (area-hosts acfg))))) -;;; (hostshash (area-hosts acfg)) -;;; (my-id (area-pktid acfg)) -;;; (pktsdir (area-pktsdir acfg)) ;; needed to remove pkts from non-responsive servers -;;; (numsrvs 0) -;;; (delpkt (lambda (pktsdir sid) -;;; (print "clearing out server " sid) -;;; (delete-file* (conc pktsdir "/" sid ".pkt")) -;;; (hash-table-delete! hostshash sid)))) -;;; (area-last-srvup-set! acfg (current-seconds)) -;;; (for-each -;;; (lambda (servpkt) -;;; (if (list? servpkt) -;;; ;; (pp servpkt) -;;; (let* ((shost (alist-ref 'ipaddr servpkt)) -;;; (sport (any->number (alist-ref 'port servpkt))) -;;; (res (handle-exceptions -;;; exn -;;; (begin -;;; ;; (print "INFO: bad server on " shost ":" sport) -;;; #f) -;;; (ping acfg shost sport))) -;;; (sid (alist-ref 'Z servpkt)) ;; Z code is our name for the server -;;; (url (conc shost ":" sport)) -;;; ) -;;; #;(if (or (not res) -;;; (null? res)) -;;; (begin -;;; (print "STRANGE: ping of " url " gave " res))) -;;; -;;; ;; (print "Got " res " from " shost ":" sport) -;;; (match res -;;; ((qduration . payload) -;;; ;; (print "Server pkt:" (alist-ref 'ipaddr servpkt) ":" (alist-ref 'port servpkt) -;;; ;; (if payload -;;; ;; "Success" "Fail")) -;;; (match payload -;;; ((host port stats) -;;; ;; (print "From " host ":" port " got stats: " stats) -;;; (if (and host port stats) -;;; (let ((url (conc host ":" port))) -;;; (hash-table-set! hostshash sid servpkt) -;;; ;; store based on host:port -;;; (hash-table-set! (area-hoststats acfg) sid stats)) -;;; (print "missing data from the server, not sure what that means!")) -;;; (set! numsrvs (+ numsrvs 1))) -;;; (#f -;;; (print "Removing pkt " sid " due to #f from server or failed ping") -;;; (delpkt pktsdir sid)) -;;; (else -;;; (print "Got ")(pp res)(print " from server ")(pp servpkt) " but response did not match (#f/#t . msg)"))) -;;; (else -;;; ;; here we delete the pkt - can't reach the server, remove it -;;; ;; however this logic is inadequate. we should mark the server as checked -;;; ;; and not good, if it happens a second time - then remove the pkt -;;; ;; or something similar. I.e. don't be too quick to assume the server is wedged or dead -;;; ;; could be it is simply too busy to reply -;;; (let ((bad-pings (hash-table-ref/default (area-health acfg) url 0))) -;;; (if (> bad-pings 1) ;; two bad pings - remove pkt -;;; (begin -;;; (print "INFO: " bad-pings " bad responses from " url ", deleting pkt " sid) -;;; (delpkt pktsdir sid)) -;;; (begin -;;; (print "INFO: " bad-pings " bad responses from " shost ":" sport " not deleting pkt yet") -;;; (hash-table-set! (area-health acfg) -;;; url -;;; (+ (hash-table-ref/default (area-health acfg) url 0) 1)) -;;; )) -;;; )))) -;;; ;; servpkt is not actually a pkt? -;;; (begin -;;; (print "Bad pkt " servpkt)))) -;;; all-pkts) -;;; (sdbg> "update-known-servers" "end" start-time #f #f " found " numsrvs -;;; " servers, pkts: " (map (lambda (p) -;;; (alist-ref 'Z p)) -;;; all-pkts)) -;;; numsrvs)) -;;; -;;; (defstruct srvstat -;;; (numfiles 0) ;; number of db files handled by this server - subtract 1 for the db being currently looked at -;;; (randnum #f) ;; tie breaker number assigned to by the server itself - applies only to the db under consideration -;;; (pkt #f)) ;; the server pkt -;;; -;;; ;;(define (srv->srvstat srvpkt) -;;; -;;; ;; Get the server best for given dbname and key -;;; ;; -;;; ;; NOTE: key is not currently used. The key points to the kind of query, this may be useful for directing read-only queries. -;;; ;; -;;; (define (get-best-server acfg dbname key) -;;; (let* (;; (servers (hash-table-values (area-hosts acfg))) -;;; (servers (area-hosts acfg)) -;;; (skeys (sort (hash-table-keys servers) string>=?)) ;; a stable listing -;;; (start-time (current-milliseconds)) -;;; (srvstats (make-hash-table)) ;; srvid => srvstat -;;; (url (conc (area-myaddr acfg) ":" (area-port acfg)))) -;;; ;; (print "scores for " dbname ": " (map (lambda (k)(cons k (calc-server-score acfg dbname k))) skeys)) -;;; (if (null? skeys) -;;; (if (> (update-known-servers acfg) 0) -;;; (get-best-server acfg dbname key) ;; some risk of infinite loop here, TODO add try counter -;;; (begin -;;; (print "ERROR: no server found!") ;; since this process is also a server this should never happen -;;; #f)) -;;; (begin -;;; ;; (print "in get-best-server with skeys=" skeys) -;;; (if (> (- (current-seconds) (area-last-srvup acfg)) 10) -;;; (begin -;;; (update-known-servers acfg) -;;; (sdbg> "get-best-server" "update-known-servers" start-time #f #f))) -;;; -;;; ;; for each server look at the list of dbfiles, total number of dbs being handled -;;; ;; and the rand number, save the best host -;;; ;; also do a delist-db for each server dbfile not used -;;; (let* ((best-server #f) -;;; (servers-to-delist (make-hash-table))) -;;; (for-each -;;; (lambda (srvid) -;;; (let* ((server (hash-table-ref/default servers srvid #f)) -;;; (stats (hash-table-ref/default (area-hoststats acfg) srvid '(())))) -;;; ;; (print "stats: " stats) -;;; (if server -;;; (let* ((dbweights (car stats)) -;;; (srvload (length (filter (lambda (x)(not (equal? dbname (car x)))) dbweights))) -;;; (dbrec (alist-ref dbname dbweights equal?)) ;; get the pair with fname . randscore -;;; (randnum (if dbrec -;;; dbrec ;; (cdr dbrec) -;;; 0))) -;;; (hash-table-set! srvstats srvid (make-srvstat numfiles: srvload randnum: randnum pkt: server)))))) -;;; skeys) -;;; -;;; (let* ((sorted (sort (hash-table-values srvstats) -;;; (lambda (a b) -;;; (let ((numfiles-a (srvstat-numfiles a)) -;;; (numfiles-b (srvstat-numfiles b)) -;;; (randnum-a (srvstat-randnum a)) -;;; (randnum-b (srvstat-randnum b))) -;;; (if (< numfiles-a numfiles-b) ;; Note, I don't think adding an offset works here. Goal was only move file handling to a different server if it has 2 less -;;; #t -;;; (if (and (equal? numfiles-a numfiles-b) -;;; (< randnum-a randnum-b)) -;;; #t -;;; #f)))))) -;;; (best (if (null? sorted) -;;; (begin -;;; (print "ERROR: should never be null due to self as server.") -;;; #f) -;;; (srvstat-pkt (car sorted))))) -;;; #;(print "SERVER(" url "): " dbname ": " (map (lambda (srv) -;;; (let ((p (srvstat-pkt srv))) -;;; (conc (alist-ref 'ipaddr p) ":" (alist-ref 'port p) -;;; "(" (srvstat-numfiles srv)","(srvstat-randnum srv)")"))) -;;; sorted)) -;;; best)))))) -;;; -;;; ;; send out an "I'm about to exit notice to all known servers" -;;; ;; -;;; (define (death-imminent acfg) -;;; '()) -;;; -;;; ;;====================================================================== -;;; ;; U L E X - T H E I N T E R E S T I N G S T U F F ! ! -;;; ;;====================================================================== -;;; -;;; ;; register a handler -;;; ;; NOTES: -;;; ;; dbinitsql is reserved for a list of sql statements for initializing the db -;;; ;; dbinitfn is reserved for a db init function, if exists called after dbinitsql -;;; ;; -;;; (define (register acfg key obj #!optional (ctype 'dbwrite)) -;;; (let ((ht (area-rtable acfg))) -;;; (if (hash-table-exists? ht key) -;;; (print "WARNING: redefinition of entry " key)) -;;; (hash-table-set! ht key (make-calldat obj: obj ctype: ctype)))) -;;; -;;; ;; usage: register-batch acfg '((key1 . sql1) (key2 . sql2) ... ) -;;; ;; NB// obj is often an sql query -;;; ;; -;;; (define (register-batch acfg ctype data) -;;; (let ((ht (area-rtable acfg))) -;;; (map (lambda (dat) -;;; (hash-table-set! ht (car dat)(make-calldat obj: (cdr dat) ctype: ctype))) -;;; data))) -;;; -;;; (define (initialize-area-calls-from-specfile area specfile) -;;; (let* ((callspec (with-input-from-file specfile read ))) -;;; (for-each (lambda (group) -;;; (register-batch -;;; area -;;; (car group) -;;; (cdr group))) -;;; callspec))) -;;; -;;; ;; get-rentry -;;; ;; -;;; (define (get-rentry acfg key) -;;; (hash-table-ref/default (area-rtable acfg) key #f)) -;;; -;;; (define (get-rsql acfg key) -;;; (let ((cdat (get-rentry acfg key))) -;;; (if cdat -;;; (calldat-obj cdat) -;;; #f))) -;;; -;;; -;;; -;;; ;; blocking call: -;;; ;; client server -;;; ;; ------ ------ -;;; ;; call() -;;; ;; send-message() -;;; ;; nmsg-send() -;;; ;; nmsg-receive() -;;; ;; nmsg-respond(ack,cookie) -;;; ;; ack, cookie -;;; ;; mbox-thread-wait(cookie) -;;; ;; nmsg-send(client,cookie,result) -;;; ;; nmsg-respond(ack) -;;; ;; return result -;;; ;; -;;; ;; reserved action: -;;; ;; 'immediate -;;; ;; 'dbinitsql -;;; ;; -;;; (define (call acfg dbname action params #!optional (count 0)) -;;; (let* ((call-start-time (current-milliseconds)) -;;; (srv (get-best-server acfg dbname action)) -;;; (post-get-start-time (current-milliseconds)) -;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f)) -;;; (myid (trim-pktid (area-pktid acfg))) -;;; (srvid (trim-pktid (alist-ref 'Z srv))) -;;; (cookie (make-cookie myid))) -;;; (sdbg> "call" "get-best-server" call-start-time #f call-start-time " from: " myid " to server: " srvid " for " dbname " action: " action " params: " params " rdat: " rdat) -;;; (print "INFO: call to " (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv) " from " (area-myaddr acfg) ":" (area-port acfg) " for " dbname) -;;; (if (and srv rdat) ;; need both to dispatch a request -;;; (let* ((ripaddr (alist-ref 'ipaddr srv)) -;;; (rsrvid (alist-ref 'Z srv)) -;;; (rport (any->number (alist-ref 'port srv))) -;;; (res-full (if (and (equal? ripaddr (area-myaddr acfg)) -;;; (equal? rport (area-port acfg))) -;;; (request acfg ripaddr rport (area-pktid acfg) action cookie dbname params) -;;; (safe-call 'request ripaddr rport -;;; (area-myaddr acfg) -;;; (area-port acfg) -;;; #;(area-pktid acfg) -;;; rsrvid -;;; action cookie dbname params)))) -;;; ;; (print "res-full: " res-full) -;;; (match res-full -;;; ((response-ok response-msg rem ...) -;;; (let* ((send-message-time (current-milliseconds)) -;;; ;; (match res-full -;;; ;; ((response-ok response-msg) -;;; ;; (response-ok (car res-full)) -;;; ;; (response-msg (cadr res-full) -;;; ) -;;; ;; (res (take res-full 3))) ;; ctype == action, TODO: converge on one term <<=== what was this? BUG -;;; ;; (print "ulex:call: send-message took " (- send-message-time post-get-start-time) " ms params=" params) -;;; (sdbg> "call" "send-message" post-get-start-time #f call-start-time) -;;; (cond -;;; ((not response-ok) #f) -;;; ((member response-msg '("db read submitted" "db write submitted")) -;;; (let* ((cookie-id (cadddr res-full)) -;;; (mbox (make-mailbox)) -;;; (mbox-time (current-milliseconds))) -;;; (hash-table-set! (area-cookie2mbox acfg) cookie-id mbox) -;;; (let* ((mbox-timeout-secs 20) -;;; (mbox-timeout-result 'MBOX_TIMEOUT) -;;; (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) -;;; (mbox-receive-time (current-milliseconds))) -;;; (hash-table-delete! (area-cookie2mbox acfg) cookie-id) -;;; (sdbg> "call" "mailbox-receive" mbox-time #f call-start-time " from: " myid " to server: " srvid " for " dbname) -;;; ;; (print "ulex:call mailbox-receive took " (- mbox-receive-time mbox-time) "ms params=" params) -;;; res))) -;;; (else -;;; (print "Unhandled response \""response-msg"\"") -;;; #f)) -;;; ;; depending on what action (i.e. ctype) is we will block here waiting for -;;; ;; all the data (mechanism to be determined) -;;; ;; -;;; ;; if res is a "working on it" then wait -;;; ;; wait for result -;;; ;; mailbox thread wait on -;;; -;;; ;; if res is a "can't help you" then try a different server -;;; ;; if res is a "ack" (e.g. for one-shot requests) then return res -;;; )) -;;; (else -;;; (if (< count 10) -;;; (let* ((url (conc (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv)))) -;;; (thread-sleep! 1) -;;; (print "ERROR: Bad result from " url ", dbname: " dbname ", action: " action ", params: " params ". Trying again in 1 second.") -;;; (call acfg dbname action params (+ count 1))) -;;; (begin -;;; (error (conc "ERROR: " count " tries, still have improper response res-full=" res-full))))))) -;;; (begin -;;; (if (not rdat) -;;; (print "ERROR: action " action " not registered.") -;;; (if (< count 10) -;;; (begin -;;; (thread-sleep! 1) -;;; (area-hosts-set! acfg (make-hash-table)) ;; clear out all known hosts -;;; (print "ERROR: no server found, srv=" srv ", trying again in 1 seconds") -;;; (call acfg dbname action params (+ count 1))) -;;; (begin -;;; (error (conc "ERROR: no server found after 10 tries, srv=" srv ", giving up.")) -;;; #;(error "No server available")))))))) -;;; -;;; -;;; ;;====================================================================== -;;; ;; U T I L I T I E S -;;; ;;====================================================================== -;;; -;;; ;; get a signature for identifing this process -;;; ;; -;;; (define (get-process-signature) -;;; (cons (get-host-name)(current-process-id))) -;;; -;;; ;;====================================================================== -;;; ;; S Y S T E M S T U F F -;;; ;;====================================================================== -;;; -;;; ;; get normalized cpu load by reading from /proc/loadavg and -;;; ;; /proc/cpuinfo return all three values and the number of real cpus -;;; ;; and the number of threads returns alist '((adj-cpu-load -;;; ;; . normalized-proc-load) ... etc. keys: adj-proc-load, -;;; ;; adj-core-load, 1m-load, 5m-load, 15m-load -;;; ;; -;;; (define (get-normalized-cpu-load) -;;; (let ((res (get-normalized-cpu-load-raw)) -;;; (default `((adj-proc-load . 2) ;; there is no right answer -;;; (adj-core-load . 2) -;;; (1m-load . 2) -;;; (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong -;;; (15m-load . 0) -;;; (proc . 1) -;;; (core . 1) -;;; (phys . 1) -;;; (error . #t)))) -;;; (cond -;;; ((and (list? res) -;;; (> (length res) 2)) -;;; res) -;;; ((eq? res #f) default) ;; add messages? -;;; ((eq? res #f) default) ;; this would be the #eof -;;; (else default)))) -;;; -;;; (define (get-normalized-cpu-load-raw) -;;; (let* ((actual-host (get-host-name))) ;; #f is localhost -;;; (let ((data (append -;;; (with-input-from-file "/proc/loadavg" read-lines) -;;; (with-input-from-file "/proc/cpuinfo" read-lines) -;;; (list "end"))) -;;; (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) -;;; (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) -;;; (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) -;;; (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) -;;; (max-num (lambda (p n)(max (string->number p) n)))) -;;; ;; (print "data=" data) -;;; (if (null? data) ;; something went wrong -;;; #f -;;; (let loop ((hed (car data)) -;;; (tal (cdr data)) -;;; (loads #f) -;;; (proc-num 0) ;; processor includes threads -;;; (phys-num 0) ;; physical chip on motherboard -;;; (core-num 0)) ;; core -;;; ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) -;;; (if (null? tal) ;; have all our data, calculate normalized load and return result -;;; (let* ((act-proc (+ proc-num 1)) -;;; (act-phys (+ phys-num 1)) -;;; (act-core (+ core-num 1)) -;;; (adj-proc-load (/ (car loads) act-proc)) -;;; (adj-core-load (/ (car loads) act-core)) -;;; (result -;;; (append (list (cons 'adj-proc-load adj-proc-load) -;;; (cons 'adj-core-load adj-core-load)) -;;; (list (cons '1m-load (car loads)) -;;; (cons '5m-load (cadr loads)) -;;; (cons '15m-load (caddr loads))) -;;; (list (cons 'proc act-proc) -;;; (cons 'core act-core) -;;; (cons 'phys act-phys))))) -;;; result) -;;; (regex-case -;;; hed -;;; (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) -;;; (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) -;;; (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) -;;; (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) -;;; (else -;;; (begin -;;; ;; (print "NO MATCH: " hed) -;;; (loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))) -;;; -;;; (define (get-host-stats acfg) -;;; (let ((stats-hash (area-stats acfg))) -;;; ;; use this opportunity to remove references to dbfiles which have not been accessed in a while -;;; (for-each -;;; (lambda (dbname) -;;; (let* ((stats (hash-table-ref stats-hash dbname)) -;;; (last-access (stat-when stats))) -;;; (if (and (> last-access 0) ;; if zero then there has been no access -;;; (> (- (current-seconds) last-access) 10)) ;; not used in ten seconds -;;; (begin -;;; (print "Removing " dbname " from stats list") -;;; (hash-table-delete! stats-hash dbname) ;; remove from stats hash -;;; (stat-dbs-set! stats (hash-table-keys stats)))))) -;;; (hash-table-keys stats-hash)) -;;; -;;; `(,(hash-table->alist (area-dbs acfg)) ;; dbname => randnum -;;; ,(map (lambda (dbname) ;; dbname is the db name -;;; (cons dbname (stat-when (hash-table-ref stats-hash dbname)))) -;;; (hash-table-keys stats-hash)) -;;; (cpuload . ,(get-normalized-cpu-load))))) -;;; #;(stats . ,(map (lambda (k) ;; create an alist from the stats data -;;; (cons k (stat->alist (hash-table-ref (area-stats acfg) k)))) -;;; (hash-table-keys (area-stats acfg)))) -;;; -;;; #;(trace -;;; ;; assv -;;; ;; cdr -;;; ;; caar -;;; ;; ;; cdr -;;; ;; call -;;; ;; finalize-all-db-handles -;;; ;; get-all-server-pkts -;;; ;; get-normalized-cpu-load -;;; ;; get-normalized-cpu-load-raw -;;; ;; launch -;;; ;; nmsg-send -;;; ;; process-db-queries -;;; ;; receive-message -;;; ;; std-peer-handler -;;; ;; update-known-servers -;;; ;; work-queue-processor -;;; ) -;;; -;;; ;;====================================================================== -;;; ;; netutil -;;; ;; move this back to ulex-netutil.scm someday? -;;; ;;====================================================================== -;;; -;;; ;; #include -;;; ;; #include -;;; ;; #include -;;; ;; #include -;;; -;;; (foreign-declare "#include \"sys/types.h\"") -;;; (foreign-declare "#include \"sys/socket.h\"") -;;; (foreign-declare "#include \"ifaddrs.h\"") -;;; (foreign-declare "#include \"arpa/inet.h\"") -;;; -;;; ;; get IP addresses from ALL interfaces -;;; (define get-all-ips -;;; (foreign-safe-lambda* scheme-object () -;;; " -;;; -;;; // from https://stackoverflow.com/questions/17909401/linux-c-get-default-interfaces-ip-address : -;;; -;;; -;;; C_word lst = C_SCHEME_END_OF_LIST, len, str, *a; -;;; // struct ifaddrs *ifa, *i; -;;; // struct sockaddr *sa; -;;; -;;; struct ifaddrs * ifAddrStruct = NULL; -;;; struct ifaddrs * ifa = NULL; -;;; void * tmpAddrPtr = NULL; -;;; -;;; if ( getifaddrs(&ifAddrStruct) != 0) -;;; C_return(C_SCHEME_FALSE); -;;; -;;; // for (i = ifa; i != NULL; i = i->ifa_next) { -;;; for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) { -;;; if (ifa->ifa_addr->sa_family==AF_INET) { // Check it is -;;; // a valid IPv4 address -;;; tmpAddrPtr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; -;;; char addressBuffer[INET_ADDRSTRLEN]; -;;; inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN); -;;; // printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); -;;; len = strlen(addressBuffer); -;;; a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); -;;; str = C_string(&a, len, addressBuffer); -;;; lst = C_a_pair(&a, str, lst); -;;; } -;;; -;;; // else if (ifa->ifa_addr->sa_family==AF_INET6) { // Check it is -;;; // // a valid IPv6 address -;;; // tmpAddrPtr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr; -;;; // char addressBuffer[INET6_ADDRSTRLEN]; -;;; // inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN); -;;; //// printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); -;;; // len = strlen(addressBuffer); -;;; // a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); -;;; // str = C_string(&a, len, addressBuffer); -;;; // lst = C_a_pair(&a, str, lst); -;;; // } -;;; -;;; // else { -;;; // printf(\" not an IPv4 address\\n\"); -;;; // } -;;; -;;; } -;;; -;;; freeifaddrs(ifa); -;;; C_return(lst); -;;; -;;; ")) -;;; -;;; ;; Change this to bias for addresses with a reasonable broadcast value? -;;; ;; -;;; (define (ip-pref-less? a b) -;;; (let* ((rate (lambda (ipstr) -;;; (regex-case ipstr -;;; ( "^127\\." _ 0 ) -;;; ( "^(10\\.0|192\\.168\\.)\\..*" _ 1 ) -;;; ( else 2 ) )))) -;;; (< (rate a) (rate b)))) -;;; -;;; -;;; (define (get-my-best-address) -;;; (let ((all-my-addresses (get-all-ips)) -;;; ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) -;;; ) -;;; (cond -;;; ((null? all-my-addresses) -;;; (get-host-name)) ;; no interfaces? -;;; ((eq? (length all-my-addresses) 1) -;;; (car all-my-addresses)) ;; only one to choose from, just go with it -;;; -;;; (else -;;; (car (sort all-my-addresses ip-pref-less?))) -;;; ;; (else -;;; ;; (ip->string (car (filter (lambda (x) ;; take any but 127. -;;; ;; (not (eq? (u8vector-ref x 0) 127))) -;;; ;; all-my-addresses)))) -;;; -;;; ))) -;;; -;;; (define (get-all-ips-sorted) -;;; (sort (get-all-ips) ip-pref-less?)) -;;; -;;; - ADDED ulex/ulex.dot Index: ulex/ulex.dot ================================================================== --- /dev/null +++ ulex/ulex.dot @@ -0,0 +1,136 @@ +// Copyright 2006-2017, Matthew Welland. +// +// This file is part of Megatest. +// +// Megatest is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Megatest is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Megatest. If not, see . + +digraph G { + + // graph[center=true, margin=0.2, nodesep=0.1, ranksep=0.3] + + layout=neato; + // layout=fdp; + // overlap=scalexy; //false, compress, ... + overlap=scalexy; + // sep="+1"; // 0.1, +1 + sep="-0.1"; + + user_program [label="user program"]; + + subgraph cluster_1 { + node [style=filled,shape=oval]; + label = "caller"; + color=brown; + + send_receive [label="(send-receive uconn\n host-port qrykey cmd data)"]; + send [label="(send uconn host-port\n qrykey cmd data)"]; + ulex_cmd_loopcaller [label="(ulex-cmd-loop uconn)"]; + ulex_handlercaller [label="(ulex-handler uconn rdat)"]; + mailbox [label="mailbox\n\nrdat\n...",shape=box]; + + send_receive -> send; + ulex_cmd_loopcaller -> ulex_handlercaller; + ulex_handlercaller -> mailbox; + mailbox -> send_receive; + } + + subgraph cluster_2 { + node [shape=oval]; + label = "listener"; + color=green; + + ulex_cmd_loop [label="(ulex-cmd-loop uconn)"]; + ulex_handler [label="(ulex-handler \nuconn rdat)"]; + add_to_work_queue [label="(add-to-work-queue\n uconn rdat)"]; + queue [label="queue\n\nrdat\n...",shape=box]; + process_work_queue [label="(process-work-queue uconn)"]; + do_work [label="(do-work uconn rdat)\nrdat: '(rem-host-port qrykey cmd params)"]; + user_proc [label="(proc rem-host-port\n qrykey cmd params)\n;; proc supplied by user"]; + sendlis [label="(send uconn host-port\n qrykey 'response result)"]; + + ulex_cmd_loop -> ulex_handler [label="rdat"]; + ulex_handler -> add_to_work_queue [label="rdat"]; + + add_to_work_queue -> queue [label="rdat"]; + + subgraph cluster_3 { + label = "remote work"; + color=blue; + + queue -> process_work_queue [label="rdat"]; + process_work_queue -> do_work [label="rdat"]; + do_work -> user_proc; // [label="rdat: '(rem-host-port\n qrykey cmd params)"]; + } + } + + user_proc -> sendlis; + user_program -> send_receive; + send_receive -> user_program; + + send -> ulex_cmd_loop [label="rdat: '(host-port\n qrykey cmd data)"]; + sendlis -> ulex_cmd_loopcaller [label="rdat: '(host-port qrykey\n 'response result)"]; + ulex_handler -> send [label="'ack"]; + ulex_handlercaller -> sendlis [label="'ack"]; + +} + + +// check_available_queue -> remove_entries_over_10s_old; +// remove_entries_over_10s_old -> set_available [label="num_avail < 3"]; +// remove_entries_over_10s_old -> exit [label="num_avail > 2"]; +// +// set_available -> delay_2s; +// delay_2s -> check_place_in_queue; +// +// check_place_in_queue -> "http:transport-launch" [label="at head"]; +// check_place_in_queue -> exit [label="not at head"]; +// +// "client:login" -> "server:shutdown" [label="login failed"]; +// "server:shutdown" -> exit; +// +// subgraph cluster_2 { +// "http:transport-launch" -> "http:transport-run"; +// "http:transport-launch" -> "http:transport-keep-running"; +// +// "http:transport-keep-running" -> "tests running?"; +// "tests running?" -> "client:login" [label=yes]; +// "tests running?" -> "server:shutdown" [label=no]; +// "client:login" -> delay_5s [label="login ok"]; +// delay_5s -> "http:transport-keep-running"; +// } +// + // start_server -> "server_running?"; + // "server_running?" -> set_available [label="no"]; + // "server_running?" -> delay_2s [label="yes"]; + // delay_2s -> "still_running?"; + // "still_running?" -> ping_server [label=yes]; + // "still_running?" -> set_available [label=no]; + // ping_server -> exit [label=alive]; + // ping_server -> remove_server_record [label=dead]; + // remove_server_record -> set_available; + // set_available -> avail_delay [label="delay 3s"]; + // avail_delay -> "first_in_queue?"; + // + // "first_in_queue?" -> set_running [label=yes]; + // set_running -> get_next_port -> handle_requests; + // "first_in_queue?" -> "dead_entry_in_queue?" [label=no]; + // "dead_entry_in_queue?" -> "server_running?" [label=no]; + // "dead_entry_in_queue?" -> "remove_dead_entries" [label=yes]; + // remove_dead_entries -> "server_running?"; + // + // handle_requests -> start_shutdown [label="no traffic\nno running tests"]; + // handle_requests -> shutdown_request; + // start_shutdown -> shutdown_delay; + // shutdown_request -> shutdown_delay; + // shutdown_delay -> exit; ADDED ulex/ulex.pdf Index: ulex/ulex.pdf ================================================================== --- /dev/null +++ ulex/ulex.pdf cannot compute difference between binary files ADDED ulex/ulex.png Index: ulex/ulex.png ================================================================== --- /dev/null +++ ulex/ulex.png cannot compute difference between binary files Index: ulex/ulex.scm ================================================================== --- ulex/ulex.scm +++ ulex/ulex.scm @@ -1,8 +1,8 @@ ;; ulex: Distributed sqlite3 db ;;; -;; Copyright (C) 2018 Matt Welland +;; Copyright (C) 2018-2021 Matt Welland ;; Redistribution and use in source and binary forms, with or without ;; modification, is permitted. ;; ;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS ;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED @@ -24,27 +24,61 @@ ;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. ;; ;;====================================================================== (module ulex - * + * + #;( + + ;; NOTE: looking for the handler proc - find the run-listener :) + + run-listener ;; (run-listener handler-proc [port]) => uconn + + ;; NOTE: handler-proc params; + ;; (handler-proc rem-host-port qrykey cmd params) + + send-receive ;; (send-receive uconn host-port cmd data) + + ;; NOTE: cmd can be any plain text symbol except for these; + ;; 'ping 'ack 'goodbye 'response + + set-work-handler ;; (set-work-handler proc) + + wait-and-close ;; (wait-and-close uconn) + + ulex-listener? + + ;; needed to get the interface:port that was automatically found + udat-port + udat-host-port + + ;; for testing only + ;; pp-uconn + + ;; parameters + work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct + return-method ;; parameter; 'mailbox, 'polling, 'direct + ) (import scheme chicken.base chicken.file + chicken.io chicken.time chicken.condition chicken.string chicken.sort chicken.pretty-print address-info mailbox matchable - queues + ;; queues regex regex-case + simple-exceptions + s11n srfi-1 srfi-18 srfi-4 srfi-69 system-information @@ -61,36 +95,63 @@ (host-port #f) (socket #f) ;; the peers (peers (make-hash-table)) ;; host:port->peer ;; work handling - (work-queue (make-queue)) - (work-proc #f) ;; set by user - (cnum 0) ;; cookie number - (mboxes (make-hash-table)) - (avail-cmboxes '()) ;; list of ( . ) for re-use - ) - -;; struct for keeping track of others we are talking to -;; -(defstruct pdat - (host-port #f) - (conns '()) ;; list of pcon structs, pop one off when calling the peer - ) - -;; struct for peer connections, keep track of expiration etc. -;; -(defstruct pcon - (inp #f) - (oup #f) - (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59) - (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes - ) + (work-queue (make-mailbox)) + (work-proc #f) ;; set by user + (cnum 0) ;; cookie number + (mboxes (make-hash-table)) ;; for the replies + (avail-cmboxes '()) ;; list of ( . ) for re-use + ;; threads + (numthreads 10) + (cmd-thread #f) + (work-queue-thread #f) + (num-threads-running 0) + ) + +;; Parameters + +;; work-method: +(define work-method (make-parameter 'mailbox)) +;; mailbox - all rdat goes through mailbox +;; threads - all rdat immediately executed in new thread +;; direct - no queuing +;; + +;; return-method, return the result to waiting send-receive: +(define return-method (make-parameter 'mailbox)) +;; mailbox - create a mailbox and use it for passing returning results to send-receive +;; polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result +;; direct - no queuing, result is passed back in single tcp connection +;; + +;; ;; struct for keeping track of others we are talking to +;; ;; +;; (defstruct pdat +;; (host-port #f) +;; (conns '()) ;; list of pcon structs, pop one off when calling the peer +;; ) +;; +;; ;; struct for peer connections, keep track of expiration etc. +;; ;; +;; (defstruct pcon +;; (inp #f) +;; (oup #f) +;; (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59) +;; (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes +;; ) ;;====================================================================== ;; listener ;;====================================================================== + +;; is uconn a ulex connector (listener) +;; +(define (ulex-listener? uconn) + (udat? uconn)) + ;; create a tcp listener and return a populated udat struct with ;; my port, address, hostname, pid etc. ;; return #f if fail to find a port to allocate. ;; ;; if udata-in is #f create the record @@ -111,14 +172,42 @@ (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) (udat-port-set! uconn port) (udat-host-port-set! uconn (conc addr":"port)) (udat-socket-set! uconn tlsn) uconn)) + +;; run-listener does all the work of starting a listener in a thread +;; it then returns control +;; +(define (run-listener handler-proc #!optional (port-suggestion 4242)) + (let* ((uconn (make-udat))) + (udat-work-proc-set! uconn handler-proc) + (if (setup-listener uconn port-suggestion) + (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop")) + (th2 (make-thread (lambda () + (case (work-method) + ((mailbox limited) + (process-work-queue uconn)))) + "Ulex work queue processor"))) + ;; (tcp-buffer-size 2048) + (thread-start! th1) + (thread-start! th2) + (udat-cmd-thread-set! uconn th1) + (udat-work-queue-thread-set! uconn th2) + (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".") + uconn) + (assert #f "ERROR: run-listener called without proper setup.")))) + +(define (wait-and-close uconn) + (thread-join! (udat-cmd-thread uconn)) + (tcp-close (udat-socket uconn))) ;;====================================================================== ;; peers and connections ;;====================================================================== + +(define *send-mutex* (make-mutex)) ;; send structured data to recipient ;; ;; NOTE: qrykey is what was called the "cookie" previously ;; @@ -131,155 +220,256 @@ ;; (define (send udata host-port qrykey cmd params) (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this (isme #f #;(equal? host-port my-host-port)) ;; calling myself? ;; dat is a self-contained work block that can be sent or handled locally - (dat (list my-host-port qrykey cmd params)) - ) - (if isme - (ulex-handler udata dat) ;; no transmission needed - (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? - exn - #f + (dat (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds))))) + (cond + (isme (ulex-handler udata dat)) ;; no transmission needed + (else + (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? + exn + (message exn) + (begin + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP (let-values (((inp oup)(tcp-connect host-port))) (let ((res (if (and inp oup) (begin - (write dat oup) - (read inp)) ;; yes, we always want an ack + (serialize dat oup) + (close-output-port oup) + (deserialize inp) + ) (begin (print "ERROR: send called but no receiver has been setup. Please call setup first!") #f)))) (close-input-port inp) - (close-output-port oup) - res)))))) ;; res will always be 'ack + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + res)))))))) ;; res will always be 'ack unless return-method is direct + +(define (send-via-polling uconn host-port cmd data) + (let* ((qrykey (make-cookie uconn)) + (sres (send uconn host-port qrykey cmd data))) + (case sres + ((ack) + (let loop ((start-time (current-milliseconds))) + (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout + (begin + (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data) + #f) + (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash + (if result ;; result is '(status . result-data) or #f for nothing yet + (begin + (hash-table-delete! (udat-mboxes uconn) qrykey) + (cdr result)) + (begin + (thread-sleep! 0.01) + (loop start-time))))))) + (else + (print "ULEX ERROR: Communication failed? sres="sres) + #f)))) + +(define (send-via-mailbox uconn host-port cmd data) + (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? + (qrykey (car cmbox)) + (mbox (cdr cmbox)) + (mbox-time (current-milliseconds)) + (sres (send uconn host-port qrykey cmd data))) ;; short res + (if (eq? sres 'ack) + (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread))) + #f + 120)) ;; timeout) + (mbox-timeout-result 'MBOX_TIMEOUT) + (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) + (mbox-receive-time (current-milliseconds))) + ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? + (hash-table-delete! (udat-mboxes uconn) qrykey) + (if (eq? res 'MBOX_TIMEOUT) + (begin + (print "WARNING: mbox timed out for query "cmd", with data "data + ", waiting for response from "host-port".") + ;; here it might make sense to clean up connection records and force clean start? + ;; NO. The progam using ulex needs to do the reset. Right thing here is exception + + #f) ;; convert to raising exception? + res)) + (begin + (print "ERROR: Communication failed? Got "sres) + #f)))) + ;; send a request to the given host-port and register a mailbox in udata ;; wait for the mailbox data and return it ;; (define (send-receive uconn host-port cmd data) - (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? - (qrykey (car cmbox)) - (mbox (cdr cmbox)) - (mbox-time (current-milliseconds))) - (if (eq? (send uconn host-port qrykey cmd data) 'ack) - (let* ((mbox-timeout-secs 120) ;; timeout) - (mbox-timeout-result 'MBOX_TIMEOUT) - (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) - (mbox-receive-time (current-milliseconds))) - (if (eq? res 'MBOX_TIMEOUT) - #f ;; convert to raising exception? - res)) - (begin - (print "ERROR: Communication failed?") - #f)))) ;; #f means failed to communicate + (let* ((start-time (current-milliseconds)) + (result (cond + ((member cmd '(ping goodbye)) ;; these are immediate + (send uconn host-port 'ping cmd data)) + ((eq? (work-method) 'direct) + ;; the result from send will be the actual result, not an 'ack + (send uconn host-port 'direct cmd data)) + (else + (case (return-method) + ((polling) + (send-via-polling uconn host-port cmd data)) + ((mailbox) + (send-via-mailbox uconn host-port cmd data)) + (else + (print "ULEX ERROR: unrecognised return-method "(return-method)".") + #f))))) + (duration (- (current-milliseconds) start-time))) + ;; this is ONLY for development and debugging. It will be removed once Ulex is stable. + (if (< 5000 duration) + (print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000))) + " seconds; "cmd", host-port="host-port", data="data)) + result)) + ;;====================================================================== ;; responder side ;;====================================================================== -;; take a request, rdata, and if not immediate put it in the work queue +;; take a request, rdat, and if not immediate put it in the work queue ;; ;; Reserved cmds; ack ping goodbye response ;; -(define (ulex-handler uconn rdata) - (match rdata ;; (string-split controldat) - ((rem-host-port qrykey cmd params) +(define (ulex-handler uconn rdat) + (assert (list? rdat) "FATAL: ulex-handler give rdat as not list") + (match rdat ;; (string-split controldat) + ((rem-host-port qrykey cmd params);; timedata) ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params) - (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) - (case cmd - ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack) - ((ping) - ;; (print "Got Ping!") - (add-to-work-queue uconn rdata) - 'ack) - ((goodbye) - ;; just clear out references to the caller - (add-to-work-queue uconn rdata) - 'ack) - ((response) ;; this is a result from remote processing, send it as mail ... - (if mbox - (begin - (mailbox-send! mbox params) ;; params here is our result - 'ack) - (begin - (print "ERROR: received result but no associated mbox for cookie "qrykey) - #f))) - (else - ;; (print "Got generic request: "cmd) - (add-to-work-queue uconn rdata) - 'ack)))) - (else - (print "BAD DATA? controldat=" rdata) - 'ack) ;; send ack anyway? - )) + (case cmd + ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack) + ((ping) + ;; (print "Got Ping!") + ;; (add-to-work-queue uconn rdat) + 'ack) + ((goodbye) + ;; just clear out references to the caller. NOT COMPLETE + (add-to-work-queue uconn rdat) + 'ack) + ((response) ;; this is a result from remote processing, send it as mail ... + (case (return-method) + ((polling) + (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params)) + 'ack) + ((mailbox) + (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) + (if mbox + (begin + (mailbox-send! mbox params) ;; params here is our result + 'ack) + (begin + (print "ERROR: received result but no associated mbox for cookie "qrykey) + 'no-mbox-found)))) + (else (print "ULEX ERROR: unrecognised return-method "(return-method)) + 'bad-return-method))) + (else ;; generic request - hand it to the work queue + (add-to-work-queue uconn rdat) + 'ack))) + (else + (print "ULEX ERROR: bad rdat "rdat) + 'bad-rdat))) ;; given an already set up uconn start the cmd-loop ;; (define (ulex-cmd-loop uconn) - (let* ((serv-listener (udat-socket uconn))) - (let loop ((state 'start)) - (let-values (((inp oup)(tcp-accept serv-listener))) - (let* ((rdat (read inp)) - (resp (ulex-handler uconn rdat))) - (if resp (write resp oup)) - (close-input-port inp) - (close-output-port oup)) - (loop state))))) + (let* ((serv-listener (udat-socket uconn)) + (listener (lambda () + (let loop ((state 'start)) + (let-values (((inp oup)(tcp-accept serv-listener))) + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP + (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params) + (resp (ulex-handler uconn rdat))) + (serialize resp oup) + (close-input-port inp) + (close-output-port oup) + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + ) + (loop state)))))) + ;; start N of them + (let loop ((thnum 0) + (threads '())) + (if (< thnum 100) + (let* ((th (make-thread listener (conc "listener" thnum)))) + (thread-start! th) + (loop (+ thnum 1) + (cons th threads))) + (map thread-join! threads))))) ;; add a proc to the cmd list, these are done symetrically (i.e. in all instances) ;; so that the proc can be dereferenced remotely ;; (define (set-work-handler uconn proc) (udat-work-proc-set! uconn proc)) -;; run-listener does all the work of starting a listener in a thread -;; it then returns control -;; -(define (run-listener handler-proc) - (let* ((uconn (make-udat))) - (udat-work-proc-set! uconn handler-proc) - (if (setup-listener uconn) - (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop")) - (th2 (make-thread (lambda ()(process-work-queue uconn)) "Ulex work queue processor"))) - (thread-start! th1) - (thread-start! th2) - (print "cmd loop and process workers started") - uconn) - (begin - (print "ERROR: run-listener called without proper setup.") - (exit))))) - ;;====================================================================== ;; work queues - this is all happening on the listener side ;;====================================================================== -;; rdata is (rem-host-port qrykey cmd params) +;; rdat is (rem-host-port qrykey cmd params) -(define (add-to-work-queue uconn rdata) - (queue-add! (udat-work-queue uconn) rdata)) - -(define (do-work uconn rdata) +(define (add-to-work-queue uconn rdat) + #;(queue-add! (udat-work-queue uconn) rdat) + (case (work-method) + ((threads) + (thread-start! (make-thread (lambda () + (do-work uconn rdat)) + "worker thread"))) + ((mailbox) + (mailbox-send! (udat-work-queue uconn) rdat)) + ((direct) + (do-work uconn rdat)) + (else + (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.") + (mailbox-send! (udat-work-queue uconn) rdat)))) + +;; move the logic to return the result somewhere else? +;; +(define (do-work uconn rdat) (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change ;; put this following into a do-work procedure - (match rdata + (match rdat ((rem-host-port qrykey cmd params) - (let* ((result (proc rem-host-port qrykey cmd params))) - ;; send 'response as cmd and result as params - (send uconn rem-host-port qrykey 'response result))) ;; could check for ack + (let* ((start-time (current-milliseconds)) + (result (proc rem-host-port qrykey cmd params)) + (end-time (current-milliseconds)) + (run-time (- end-time start-time))) + (case (work-method) + ((direct) result) + (else + (print "ULEX: work "cmd", "params" done in "run-time" ms") + ;; send 'response as cmd and result as params + (send uconn rem-host-port qrykey 'response result) ;; could check for ack + (print "ULEX: response sent back to "rem-host-port" in "(- (current-milliseconds) end-time)))))) + (MBOX_TIMEOUT 'do-work-timeout) (else - (print "ERROR: rdata "rdata", did not match rem-host-port qrykey cmd params"))))) - - + (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params"))))) + +;; NEW APPROACH: +;; (define (process-work-queue uconn) (let ((wqueue (udat-work-queue uconn)) - (proc (udat-work-proc uconn))) - (let loop () - (if (queue-empty? wqueue) - (thread-sleep! 0.1) - (let ((rdata (queue-remove! wqueue))) - (do-work uconn rdata))) - (loop)))) + (proc (udat-work-proc uconn)) + (numthr (udat-numthreads uconn))) + (let loop ((thnum 1) + (threads '())) + (let ((thlst (cons (make-thread (lambda () + (let work-loop () + (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT))) + (do-work uconn rdat)) + (work-loop))) + (conc "work thread " thnum)) + threads))) + (if (< thnum numthr) + (loop (+ thnum 1) + thlst) + (begin + (print "ULEX: Starting "(length thlst)" worker threads.") + (map thread-start! thlst) + (print "ULEX: Threads started. Joining all.") + (map thread-join! thlst))))))) ;; below was to enable re-use of connections. This seems non-trivial so for ;; now lets open on each call ;; ;; ;; given host-port get or create peer struct @@ -374,50 +564,6 @@ (map address-info-host (filter (lambda (x) (equal? (address-info-type x) "tcp")) (address-infos (get-host-name))))) -;; (map ip->string (vector->list -;; (hostinfo-addresses -;; (host-information (current-hostname)))))) - - -) - -(import ulex trace big-chicken srfi-18 test matchable system-information) -(trace-call-sites #t) -(trace - ;; ulex-handler - ;; send - ;; add-to-work-queue - ) - -(define (handler-proc rem-host-port qrykey cmd params) - (print "handler-proc "rem-host-port" "qrykey" "cmd" "params) - (case cmd - ((ping) 'pong) - ((calc) (eval (with-input-from-string params read))) - ((print) - (print "params="params) - params) - ((reflect) `(,rem-host-port ,qrykey ,cmd ,params)) - (else `(data ,data)))) - -(define uconn (run-listener handler-proc)) - -(pp-uconn uconn) - -;; super basic loop back test -(define res #f) -(define targhost (conc (get-host-name)":"4242)) -(define th1 (make-thread (lambda () - (test #f 10 (send-receive uconn targhost 'calc "(+ 5 5)")) - (set! res (send-receive uconn targhost 'ping '())) - (test #f 'pong (send-receive uconn targhost 'ping '())) - ))) - -(thread-start! th1) -(thread-join! th1) - -(print "All done") -(print "Received "res) - +)