Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -15,31 +15,58 @@ # You should have received a copy of the GNU General Public License # along with Megatest. If not, see . # make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' # rm .o ; make install CSCOPTS='-profile' ; ... ; chicken-profile | less + +all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut + +recent-commits.csv : .fslckout + fossil timeline -n 350 -t ci -F "%h,%a,%b,%t,\"%c\"" > recent-commits.csv + + SHELL=/bin/bash PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm \ server.scm configf.scm db.scm keys.scm margs.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ - http-transport.scm tdb.scm client.scm mt.scm \ - ezsteps.scm lock-queue.scm rmt.scm api.scm \ - subrun.scm portlogger.scm archive.scm env.scm \ + tdb.scm mt.scm \ + ezsteps.scm rmt.scm api.scm \ + subrun.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm +MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \ + tcp-transportmod.scm rmtmod.scm portlogger.scm -all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt +transport-mode.scm : transport-mode.scm.template + @if [[ -e transport-mode.scm ]];then \ + echo "WARNING: transport-mode.scm.template is newer than transport-mode.scm"; else \ + cp transport-mode.scm.template transport-mode.scm; fi + +dashboard-transport-mode.scm : dashboard-transport-mode.scm.template + @if [[ -e dashboard-transport-mode.scm ]];then \ + echo "WARNING: dashboard-transport-mode.scm.template is newer than dashboard-transport-mode.scm"; else \ + cp dashboard-transport-mode.scm.template dashboard-transport-mode.scm; fi + +megatest.scm : transport-mode.scm +dashboard.scm : dashboard-transport-mode.scm # dbmod.import.o is just a hack here -mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o -db.o : dbmod.import.o +mofiles/portlogger.o : mofiles/dbmod.o + +mofiles/dbfile.o : \ + mofiles/debugprint.o mofiles/commonmod.o + +configf.o : commonmod.import.o +mofiles/dbfile.o : mofiles/debugprint.o +mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o mofiles/commonmod.o mofiles/debugprint.o +db.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o : mofiles/mtargs.o +mofiles/tcp-transportmod.o : mofiles/portlogger.o # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ # rmtmod.scm apimod.scm @@ -63,13 +90,14 @@ # @[ -e mofiles ] || mkdir -p mofiles # csc $(CSCOPTS) -I mofiles -I $* -J -c $< -o $*.o # cp $*.o mofiles/$*.o # @touch $*.import.scm # ensure it is touched after the .o is made -mofiles/%.o : %.scm - mkdir -p mofiles +%.import.scm mofiles/%.o : %.scm + @mkdir -p mofiles csc $(CSCOPTS) -J -c $< -o mofiles/$*.o + @if [[ -e $*.import.scm ]];then touch $*.import.scm;fi # ensure it is touched after the .o is made ADTLSCR=mt_laststep mt_runstep mt_ezstep HELPERS=$(addprefix $(PREFIX)/bin/,$(ADTLSCR)) DEPLOYHELPERS=$(addprefix deploytarg/,$(ADTLSCR)) MTESTHASH=$(shell fossil info|grep checkout:| awk '{print $$2}') @@ -96,49 +124,45 @@ @echo $(MTESTHASH) dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o dboard -mtut: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm - csc $(CSCOPTS) $(OFILES) $(MOFILES) mtut.scm -o mtut +mtut: $(OFILES) $(MOFILES) $(MOIMPFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm + csc $(CSCOPTS) $(OFILES) $(MOFILES) $(MOIMPFILES) mtut.scm -o mtut # include makefile.inc -TCMTOBJS = \ - api.o \ - archive.o \ - cgisetup/models/pgdb.o \ - client.o \ - common.o \ - configf.o \ - db.o \ - env.o \ - http-transport.o \ - items.o \ - keys.o \ - launch.o \ - lock-queue.o \ - margs.o \ - mt.o \ - ods.o \ - portlogger.o \ - process.o \ - rmt.o \ - runconfig.o \ - runs.o \ - server.o \ - tasks.o \ - tdb.o \ - tests.o \ - subrun.o \ - ezsteps.o - -# mofiles/rmtmod.o \ -# mofiles/commonmod.o \ - -tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES) - csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt +# TCMTOBJS = \ +# api.o \ +# archive.o \ +# cgisetup/models/pgdb.o \ +# common.o \ +# configf.o \ +# db.o \ +# env.o \ +# items.o \ +# keys.o \ +# launch.o \ +# margs.o \ +# mt.o \ +# ods.o \ +# process.o \ +# rmt.o \ +# runconfig.o \ +# runs.o \ +# server.o \ +# tasks.o \ +# tdb.o \ +# tests.o \ +# subrun.o \ +# ezsteps.o +# +# # mofiles/rmtmod.o \ +# # mofiles/commonmod.o \ +# +# tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES) +# csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt # install documentation to $(PREFIX)/docs # DOES NOT REBUILD DOCS # $(PREFIX)/share/docs/megatest_manual.html : docs/manual/megatest_manual.html @@ -156,11 +180,11 @@ $(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql # Special dependencies for the includes $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm -mofiles/commonmod.o : megatest-fossil-hash.scm +mofiles/commonmod.o : mofiles/debugprint.o megatest-fossil-hash.scm common.o : mofiles/commonmod.o # mofiles/dbmod.o : mofiles/configfmod.o # commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm @@ -176,16 +200,17 @@ tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm -mofiles-made : $(MOFILES) - make $(MOIMPFILES) +# mofiles-made : $(MOFILES) +# make $(MOIMPFILES) +# touch mofiles-made -megatest.o : mofiles-made megatest-fossil-hash.scm megatest-version.scm $(MOFILES) $(MOIMPFILES) +megatest.o : megatest-fossil-hash.scm megatest-version.scm $(MOFILES) $(MOIMPFILES) -rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm +rmt.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm common_records.scm : altdb.scm mofiles/dbfile.o : mofiles/commonmod.o @@ -260,16 +285,16 @@ utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec chmod a+x $(PREFIX)/bin/mtexec # tcmt -$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt - $(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt - -$(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper - utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt - chmod a+x $(PREFIX)/bin/tcmt +# $(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt +# $(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt +# +# $(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper +# utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt +# chmod a+x $(PREFIX)/bin/tcmt $(PREFIX)/bin/mt_laststep : utils/mt_laststep $(INSTALL) $< $@ chmod a+x $@ @@ -359,22 +384,22 @@ $(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \ fi install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ - $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ - $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \ + $(PREFIX)/share/db/mt-pg.sql \ $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \ $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \ $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \ - $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 -# $(PREFIX)/bin/.$(ARCHSTR)/ndboard + $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 $(PREFIX)/bin/serialize-env +# $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/tcmt +# $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ # $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : mkdir -p $(PREFIX)/bin/.$(ARCHSTR) mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib @@ -471,35 +496,42 @@ fi altdb.scm : echo ";; optional alternate db setup" > altdb.scm echo "(define *available-db* (make-hash-table))" >> altdb.scm - if csi -ne '(use mysql-client)';then \ + if csi -ne '(use mysql-client)' &> /dev/null;then \ echo "(use mysql-client)(hash-table-set! *available-db* 'mysql #t)" >> altdb.scm; \ fi - if csi -ne '(use postgresql)';then \ + if csi -ne '(use postgresql)'&> /dev/null;then \ echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\ fi + if csi -ne '(import mysql-client)'&> /dev/null;then \ + echo "(import mysql-client)(hash-table-set! *available-db* 'mysql #t)" >> altdb.scm; \ + fi + if csi -ne '(import postgresql)'&> /dev/null;then \ + echo "(import postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\ + fi + +# portlogger-example : portlogger-example.scm api.o archive.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o synchash.o tasks.o tdb.o tests.o tree.o +# csc $(CSCOPTS) portlogger-example.scm api.o archive.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o sync-hash.o tasks.o tdb.o tests.o tree.o + +unitdeps.dot : *scm ./utils/plot-uses Makefile + ./utils/plot-uses todot commonmod.import,mtargs.import,mtargs,debugprint *.scm > unitdeps.dot + +unitdeps.pdf : unitdeps.dot + dot unitdeps.dot -Tpdf -o unitdeps.pdf -portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o synchash.o tasks.o tdb.o tests.o tree.o - csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o synchash.o tasks.o tdb.o tests.o tree.o +./utils/plot-uses : utils/plot-uses.scm + csc utils/plot-uses.scm # create a pdf dot graphviz diagram from notations in rmt.scm rmt.pdf : rmt.scm grep ';;DOT' rmt.scm | sed -e 's/.*;;DOT //' > rmt.dot;dot -Tpdf rmt.dot -o rmt.pdf buildmanual: cd docs/manual && make -wikipage=plan -editwiki: - cd docs/manual && ../../utils/editwiki $(wikipage) - -viewmanual: - arora docs/manual/megatest_manual.html - targets: @grep : Makefile | perl -ne '/^([A-Za-z0-9_-]+):/ && print "$$1\n"' - unit : cd tests;make unit Index: TODO ================================================================== --- TODO +++ TODO @@ -16,10 +16,13 @@ # along with Megatest. If not, see . TODO ==== +23WW07 +. Remove use of *dbstruct-dbs* + WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling WW16 Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -1,7 +1,5 @@ - - ;;====================================================================== ;; Copyright 2006-2013, Matthew Welland. ;; ;; This file is part of Megatest. ;; @@ -18,21 +16,30 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== -(use srfi-69 posix) - (declare (unit api)) -(declare (uses rmt)) (declare (uses db)) +(declare (uses debugprint)) +(declare (uses commonmod)) (declare (uses dbmod)) (declare (uses dbfile)) (declare (uses tasks)) +(declare (uses tcp-transportmod)) +(import commonmod) (import dbmod) (import dbfile) +(import debugprint) +(import tcp-transportmod) + +(use srfi-69 + srfi-18 + posix + matchable + s11n) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs @@ -39,10 +46,11 @@ get-var get-keys get-key-vals test-toplevel-num-items get-test-info-by-id + get-test-state-status-by-id get-steps-info-by-id get-data-info-by-id test-get-rundir-from-test-id get-count-tests-running-for-testname get-count-tests-running @@ -59,12 +67,12 @@ get-run-info get-run-status get-run-state get-run-stats get-run-times - get-targets get-target + get-targets ;; register-run get-tests-tags get-test-times get-tests-for-run get-tests-for-run-state-status @@ -86,11 +94,10 @@ read-test-data-varpatt login tasks-get-last testmeta-get-record have-incompletes? - ;; synchash-get get-changed-record-ids get-run-record-ids get-not-completed-cnt)) (define api:write-queries @@ -126,11 +133,11 @@ ;; TEST DATA test-data-rollup csv->test-data ;; MISC - sync-inmem->db + sync-cachedb->db drop-all-triggers create-all-triggers update-tesdata-on-repilcate-db ;; TESTMETA @@ -141,262 +148,335 @@ tasks-add tasks-set-state-given-param-key )) (define *db-write-mutexes* (make-hash-table)) - -;; These are called by the server on recipt of /api calls -;; - keep it simple, only return the actual result of the call, i.e. no meta info here -;; -;; - returns #( flag result ) -;; -(define (api:execute-requests dbstruct dat) - (db:open-no-sync-db) ;; sets *no-sync-db* -;; (handle-exceptions -;; exn -;; (let ((call-chain (get-call-chain))) -;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) -;; (print-call-chain (current-error-port)) -;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens - (if (> *api-process-request-count* 200) - (begin - (if (common:low-noise-print 30 "too many threads") - (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay.")) - (thread-sleep! 0.5) ;; take a nap - )) - (cond - ((not (vector? dat)) ;; it is an error to not receive a vector - (vector #f (vector #f "remote must be called with a vector"))) - #;((> *api-process-request-count* 200) ;; 20) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") - (set! *server-overloaded* #t) - (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! - (else - (let* ((cmd-in (vector-ref dat 0)) - (cmd (if (symbol? cmd-in) - cmd-in - (string->symbol cmd-in))) - (params (vector-ref dat 1)) - (run-id (if (null? params) - 0 - (car params))) - (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id) - (hash-table-ref *db-write-mutexes* run-id) - (let* ((newmutex (make-mutex))) - (hash-table-set! *db-write-mutexes* run-id newmutex) - newmutex))) - (start-t (current-milliseconds)) - (readonly-mode (dbr:dbstruct-read-only dbstruct)) - (readonly-command (member cmd api:read-only-queries)) - (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) - (if (not readonly-command) - (mutex-lock! write-mutex)) - (let* ((res - (if writecmd-in-readonly-mode - (conc "attempt to run write command "cmd" on a read-only database") - (case cmd - ;;=============================================== - ;; READ/WRITE QUERIES - ;;=============================================== - - ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl - - ;; SERVERS - ((start-server) (apply server:kind-run params)) - ((kill-server) (set! *server-run* #f)) - - ;; TESTS - - ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) - ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. - ((test-set-state-status-by-id) - - ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - (db:set-state-status-and-roll-up-items - dbstruct - (list-ref params 0) ; run-id - (list-ref params 1) ; test-name - #f ; item-path - (list-ref params 2) ; state - (list-ref params 3) ; status - (list-ref params 4) ; comment - )) - - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) - ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) - ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) - ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) - ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) - - ;; RUNS - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((update-run-stats) (apply db:update-run-stats dbstruct params)) - ((set-var) (apply db:set-var dbstruct params)) - ((inc-var) (apply db:inc-var dbstruct params)) - ((dec-var) (apply db:dec-var dbstruct params)) - ((del-var) (apply db:del-var dbstruct params)) - ((add-var) (apply db:add-var dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - - ;; MISC - ((sync-inmem->db) (let ((run-id (car params))) - (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) - ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) - ((create-all-triggers) (db:create-all-triggers dbstruct)) - ((drop-all-triggers) (db:drop-all-triggers dbstruct)) - - ;; TESTMETA - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - ((get-tests-tags) (db:get-tests-tags dbstruct)) - - ;; TASKS - ((tasks-add) (apply tasks:add dbstruct params)) - ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) - ((tasks-get-last) (apply tasks:get-last dbstruct params)) - - ;; NO SYNC DB - ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) - ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) - ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) - ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) - - ;; ARCHIVES - ;; ((archive-get-allocations) - ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) - ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) - ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) - - ;;====================================================================== - ;; READ ONLY QUERIES - ;;====================================================================== - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - ((get-key-vals) (apply db:get-key-vals dbstruct params)) - ((get-target) (apply db:get-target dbstruct params)) - ((get-targets) (db:get-targets dbstruct)) - - ;; ARCHIVES - ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) - - ;; TESTS - ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) - ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) - ;; ((synchash-get) (apply synchash:server-get dbstruct params)) - ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) - ((get-test-times) (apply db:get-test-times dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((get-run-status) (apply db:get-run-status dbstruct params)) - ((get-run-state) (apply db:get-run-state dbstruct params)) - ((set-run-status) (apply db:set-run-status dbstruct params)) - ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) - ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) - ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) - ((get-num-runs) (apply db:get-num-runs dbstruct params)) - ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) - ((get-var) (apply db:get-var dbstruct params)) - ((get-run-stats) (apply db:get-run-stats dbstruct params)) - ((get-run-times) (apply db:get-run-times dbstruct params)) - - ;; STEPS - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) - ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) - - ;; TEST DATA - ((read-test-data) (apply db:read-test-data dbstruct params)) - ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params)) - ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) - - ;; MISC - ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) - ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:general-call dbstruct run-id stmtname realparams))) - ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) - ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) - ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - - ;; TASKS - ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) - (else - (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) - (conc "ERROR: BAD api call " cmd)))))) - (if (not readonly-command) - (mutex-unlock! write-mutex)) - - ;; save all stats - (let ((delta-t (- (current-milliseconds) - start-t)) - (modified-cmd (if (eq? cmd 'general-call) - (string->symbol (conc "general-call-" (car params))) - cmd))) - (hash-table-set! *db-api-call-time* modified-cmd - (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '())))) - (if writecmd-in-readonly-mode - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #t))) - (vector #f res)) - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #f))) - (vector #t res)))))))) +(define *server-signature* #f) +;; ;; These are called by the server on recipt of /api calls +;; ;; - keep it simple, only return the actual result of the call, i.e. no meta info here +;; ;; +;; ;; - returns #( flag result ) +;; ;; +;; (define (api:execute-requests dbstruct dat) +;; (if (> *api-process-request-count* 50) +;; (begin +;; (if (common:low-noise-print 30 "too many threads") +;; (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay.")) +;; ;; (thread-sleep! 0.5) ;; take a nap - no, the napping is moved to the clients via tt:backoff-incr +;; )) +;; (cond +;; ((not (vector? dat)) ;; it is an error to not receive a vector +;; (vector #f (vector #f "remote must be called with a vector"))) +;; (else +;; (let* ((cmd-in (vector-ref dat 0)) +;; (cmd (if (symbol? cmd-in) +;; cmd-in +;; (string->symbol cmd-in))) +;; (params (vector-ref dat 1)) +;; (run-id (if (null? params) +;; 0 +;; (car params))) +;; (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id) +;; (hash-table-ref *db-write-mutexes* run-id) +;; (let* ((newmutex (make-mutex))) +;; (hash-table-set! *db-write-mutexes* run-id newmutex) +;; newmutex))) +;; (start-t (current-milliseconds)) +;; (readonly-mode (dbr:dbstruct-read-only dbstruct)) +;; (readonly-command (member cmd api:read-only-queries)) +;; (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) +;; (if (not readonly-command) +;; (mutex-lock! write-mutex)) +;; (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) +;; (clean-run-id (cond +;; ((number? run-id) run-id) +;; ((equal? run-id #f) "main") +;; (else "other"))) +;; (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params))) +;; (res +;; (if writecmd-in-readonly-mode +;; (conc "attempt to run write command "cmd" on a read-only database") +;; (api:dispatch-request dbstruct cmd run-id params)))) +;; (delete-file* crumbfile) +;; (if (not readonly-command) +;; (mutex-unlock! write-mutex)) +;; +;; ;; save all stats +;; (let ((delta-t (- (current-milliseconds) +;; start-t)) +;; (modified-cmd (if (eq? cmd 'general-call) +;; (string->symbol (conc "general-call-" (car params))) +;; cmd))) +;; (hash-table-set! *db-api-call-time* modified-cmd +;; (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '())))) +;; (if writecmd-in-readonly-mode +;; (begin +;; #;(common:telemetry-log (conc "api-out:"(->string cmd)) +;; payload: `((params . ,params) +;; (ok-res . #t))) +;; (vector #f res)) +;; (begin +;; #;(common:telemetry-log (conc "api-out:"(->string cmd)) +;; payload: `((params . ,params) +;; (ok-res . #f))) +;; (vector #t res)))))))) + +;; indat is (cmd run-id params meta) +;; +;; WARNING: Do not print anything in the lambda of this function as it +;; reads/writes to current in/out port +;; +(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params) + (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.") + (if (not *server-signature*) + (set! *server-signature* (tt:mk-signature *toppath*))) + (lambda (indat) + (let* (;; (indat (deserialize)) + (newcount (+ *api-process-request-count* 1)) + (delay-wait (if (> newcount 10) + (- newcount 10) + 0)) + (normal-proc (lambda (cmd run-id params) + (case cmd + ((ping) *server-signature*) + (else + (api:dispatch-request dbstruct cmd run-id params)))))) + (set! *api-process-request-count* newcount) + (set! *db-last-access* (current-seconds)) + (match indat + ((cmd run-id params meta) + (let* ((db-ok (let* ((dbfname (dbmod:run-id->dbfname run-id)) + (ok (equal? dbfname (dbr:dbstruct-dbfname dbstruct)))) + (case cmd + ((ping) #t) ;; we are fine + (else + (if (not ok)(debug:print 0 *default-log-port* "ERROR: "cmd", run-id "run-id", not correct for dbfname "(dbr:dbstruct-dbfname dbstruct))) + (assert ok "FATAL: database file and run-id not aligned."))))) + (ttdat *server-info*) + (server-state (tt-state ttdat)) + (status (cond + ((> newcount 3) 'busy) + ;; ((> newcount 5) 'loaded) ;; this gets transmitted to the client which calls tt:backoff-incr to slow stuff down. + (else 'ok))) + (errmsg (case status + ((busy) (conc "Server overloaded, "newcount" threads in flight")) + ((loaded) (conc "Server loaded, "newcount" threads in flight")) + (else #f))) + (result (case status + ((busy) + (if (eq? cmd 'ping) + (normal-proc cmd run-id params) + ;; newcount must be greater than 5 for busy + (* 0.25 (- newcount 3)) ;; was 15 + )) ;; (- newcount 29)) ;; call back in as many seconds + ((loaded) +;; (if (eq? (rmt:transport-mode) 'tcp) +;; (thread-sleep! 0.5)) + (normal-proc cmd run-id params)) + (else + (normal-proc cmd run-id params)))) + (meta (case cmd + ((ping) `((sstate . ,server-state))) + (else `((wait . ,delay-wait))))) + (payload (list status errmsg result meta))) + (set! *api-process-request-count* (- *api-process-request-count* 1)) + ;; (serialize payload) + payload)) + (else + (assert #f "FATAL: failed to deserialize indat "indat)))))) + + +(define (api:dispatch-request dbstruct cmd run-id params) + (if (not *no-sync-db*) + (db:open-no-sync-db)) + (case cmd + ;;=============================================== + ;; READ/WRITE QUERIES + ;;=============================================== + + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl + + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; TESTS + + ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. + ((test-set-state-status-by-id) + + ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + (db:set-state-status-and-roll-up-items + dbstruct + (list-ref params 0) ; run-id + (list-ref params 1) ; test-name + #f ; item-path + (list-ref params 2) ; state + (list-ref params 3) ; status + (list-ref params 4) ; comment + )) + + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) + ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) + ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) + + ((insert-test) (db:insert-test dbstruct run-id params)) + + ;; RUNS + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((update-run-stats) (apply db:update-run-stats dbstruct params)) + ((set-var) (apply db:set-var dbstruct params)) + ((inc-var) (apply db:inc-var dbstruct params)) + ((dec-var) (apply db:dec-var dbstruct params)) + ((del-var) (apply db:del-var dbstruct params)) + ((add-var) (apply db:add-var dbstruct params)) + + ((insert-run) (apply db:insert-run dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + + ;; MISC + ((sync-cachedb->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) + ((get-toplevels-and-incompletes) (apply db:get-toplevels-and-incompletes dbstruct params)) + ((mark-incomplete) #f);;(thread-start! (make-thread (lambda () ;; no need to block on this one + ;; (apply db:find-and-mark-incomplete dbstruct params) + ;; #t)))) + ((create-all-triggers) (db:create-all-triggers dbstruct)) + ((drop-all-triggers) (db:drop-all-triggers dbstruct)) + + ;; TESTMETA + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) + ((get-tests-tags) (db:get-tests-tags dbstruct)) + + ;; TASKS + ((tasks-add) (apply tasks:add dbstruct params)) + ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) + ((tasks-get-last) (apply tasks:get-last dbstruct params)) + + ;; NO SYNC DB + ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) + ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) + ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) + + ;; ARCHIVES + ;; ((archive-get-allocations) + ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) + ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) + ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) + + ;;====================================================================== + ;; READ ONLY QUERIES + ;;====================================================================== + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + ((get-key-vals) (apply db:get-key-vals dbstruct params)) + ((get-target) (apply db:get-target dbstruct params)) + ((get-targets) (db:get-targets dbstruct)) + + ;; ARCHIVES + ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((get-test-state-status-by-id) (apply db:get-test-state-status-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) + ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) + ((get-test-times) (apply db:get-test-times dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((get-run-state) (apply db:get-run-state dbstruct params)) + ((get-run-state-status) (apply db:get-run-state-status dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) + ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) + ((get-num-runs) (apply db:get-num-runs dbstruct params)) + ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((get-var) (apply db:get-var dbstruct params)) + ((get-run-stats) (apply db:get-run-stats dbstruct params)) + ((get-run-times) (apply db:get-run-times dbstruct params)) + + ;; STEPS + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) + ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) + + ;; TEST DATA + ((read-test-data) (apply db:read-test-data dbstruct params)) + ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params)) + ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) + + ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) + ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:general-call dbstruct run-id stmtname realparams))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) + ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + + ;; TASKS + ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) + (else + (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) + (conc "ERROR: BAD api call " cmd)))) ;; http-server send-response ;; api:process-request ;; db:* ;; Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -18,17 +18,15 @@ ;;====================================================================== (declare (unit apimod)) (declare (uses commonmod)) -(declare (uses ulex)) (module apimod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod) -(import (prefix ulex ulex:)) ) Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -16,15 +16,25 @@ ;; along with Megatest. If not, see . ;; ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 format md5 message-digest srfi-18) - (declare (unit archive)) (declare (uses db)) +(declare (uses debugprint)) +(declare (uses mtargs)) (declare (uses common)) +(declare (uses commonmod)) +(declare (uses rmtmod)) + +(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 + format md5 message-digest srfi-18) + +(import commonmod + debugprint + rmtmod + (prefix mtargs args:)) (include "common_records.scm") (include "db_records.scm") ;;====================================================================== ADDED artifacts.scm Index: artifacts.scm ================================================================== --- /dev/null +++ artifacts.scm @@ -0,0 +1,24 @@ +;;====================================================================== +;; Copyright 2019, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit artifacts)) + +(include "artifacts/artifacts.scm") + ADDED artifacts/README Index: artifacts/README ================================================================== --- /dev/null +++ artifacts/README @@ -0,0 +1,1 @@ +NOTE: keep megatest/artifacts/ in sync with datastore/artifacts ADDED artifacts/artifacts.meta Index: artifacts/artifacts.meta ================================================================== --- /dev/null +++ artifacts/artifacts.meta @@ -0,0 +1,21 @@ +;; -*- scheme -*- +( +; Your egg's license: +(license "BSD") + +; Pick one from the list of categories (see below) for your egg and enter it +; here. +(category db) + +; A list of eggs pkts depends on. If none, you can omit this declaration +; altogether. If you are making an egg for chicken 3 and you need to use +; procedures from the `files' unit, be sure to include the `files' egg in the +; `needs' section (chicken versions < 3.4.0 don't provide the `files' unit). +; `depends' is an alias to `needs'. +;; (needs (autoload "3.0")) + +; A list of eggs required for TESTING ONLY. See the `Tests' section. +(test-depends test) + +(author "Matt Welland") +(synopsis "A sha1-chain based datastore similar to the data format in fossil scm, consisting of artifacts of single line cards.")) ADDED artifacts/artifacts.release-info Index: artifacts/artifacts.release-info ================================================================== --- /dev/null +++ artifacts/artifacts.release-info @@ -0,0 +1,3 @@ +(repo fossil "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}") +(uri zip "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}/zip/{egg-name}.zip?uuid={egg-release}") +(release "1.0") ADDED artifacts/artifacts.scm Index: artifacts/artifacts.scm ================================================================== --- /dev/null +++ artifacts/artifacts.scm @@ -0,0 +1,1624 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of artifacts +;; +;; Pkts is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Pkts is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Pkts. If not, see . +;; + +;; CARDS: +;; +;; A card is a line of text, the first two characters are a letter followed by a +;; space. The letter is the card type. +;; +;; artifact: +;; +;; An artifact is a sorted list of cards with a final card Z that contains the shar1 hash +;; of all of the preceding cards. +;; +;; AARTIFACT: +;; +;; An alist mapping card types to card data +;; '((T . "artifacttype") +;; (a . "some content")) +;; +;; EARTIFACT: +;; +;; Extended packet using friendly keys. Must use a artifactspec to convert to/from eartifacts +;; '((ptype . "artifacttype") +;; (adata . "some content)) +;; +;; DARTIFACT: +;; +;; artifacts pulled from the database have this format: +;; +;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist +;; (t . "v1.63/tip/dev") +;; (c . "QUICKPATT") +;; (T . "runstart") +;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd") +;; (D . "1488995096.0")) +;; (id . 8) +;; (group-id . 0) +;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b") +;; (parent . "") +;; (artifact-type . "runstart") +;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b")) +;; +;; artifactspec is alist of alists mapping types and nicekeys to keys +;; +;; '((posting . ((title . t) +;; (url . u) +;; (blurb . b))) +;; (comment . ((comment . c) +;; (score . s)))) + +;; Reserved cards: +;; P : artifact parent +;; R : reference artifact containing mapping of short string -> sha1sum strings +;; T : artifact type +;; D : current time from (current-time), unless provided +;; Z : shar1 hash of the packet + +;; Example usage: +;; +;; Create a artifact: +;; +;; (use artifacts) +;; (define-values (uuid artifact) +;; (alist->artifact +;; '((fruit . "apple") (meat . "beef")) ;; this is the data to convert +;; '((foods (fruit . f) (meat . m))) ;; this is the artifact spec +;; ptype: +;; 'foods)) +;; +;; Add to artifact queue: +;; +;; (define db (open-queue-db "/tmp/artifacts" "artifacts.db")) +;; (add-to-queue db artifact uuid 'foods #f 0) ;; no parent and use group_id of 0 +;; +;; Retrieve the packet from the db and extract a value: +;; +;; (alist-ref +;; 'meat +;; (dartifact->alist +;; (car (get-dartifacts db #f 0 #f)) +;; '((foods (fruit . f) +;; (meat . m))))) +;; => "beef" +;; + +(module artifacts +( +;; cards, util and misc +;; sort-cards +;; calc-sha1 +;; +;; low-level constructor procs, exposed only for development/testing, will be removed +construct-sdat +construct-artifact +card->type/value +add-z-card + +;; queue database procs +open-queue-db +add-to-queue +create-and-queue +;; lookup-by-uuid +lookup-by-id +get-dartifacts +get-not-processed-artifacts +get-related +find-artifacts +process-artifacts +get-descendents +get-ancestors +get-artifacts +;; get-last-descendent +;; with-queue-db +;; load-artifacts-to-db + +;; procs that operate directly on artifacts, sdat, aartifacts, dartifacts etc. +artifact->alist ;; artifact -> aartifact (i.e. alist) +artifact->sdat ;; artifact -> '("a aval" "b bval" ...) +sdat->alist ;; '("a aval" "b bval"...) -> ((a . "aval")(b . "bval") ...) +dblst->dartifacts ;; convert list of tuples from queue db into dartifacts +dartifact->alist ;; flatten a dartifact into an alist containing all db fields and the artifact alist +dartifacts->alists ;; apply dartifact->alist to a list of alists using a artifact-spec +alist->artifact ;; returns two values uuid, artifact +get-value ;; looks up a value given a key in a dartifact +flatten-all ;; merge the list of values from a query which includes a artifact into a flat alist <== really useful! +check-artifact + +;; artifact alists +write-alist->artifact +read-artifact->alist + +;; archive database +;; archive-open-db +;; write-archive-artifacts +;; archive-artifacts +;; mark-processed + +;; artifactsdb +artifactdb-conn ;; useful +artifactdb-fname +artifactsdb-open +artifactsdb-close +artifactsdb-add-record +;; temporary +artifactdb-artifactspec + +;; utility procs +increment-string ;; used to get indexes for strings in ref artifacts +make-report ;; make a .dot file +calc-sha1 +uuid-first-two-letters +uuid-remaining-letters + +;; file and directory utils +multi-glob +capture-dir +file-get-sha1 +check-same +link-or-copy +same-partition? +link-if-same-partition +archive-copy +write-to-archive +artifact-rollup +read-artifacts-into-hash +hash-of-artifacts->bundle +archive-dest + +;; pathname-full-filename + +;; minimal artifact functions +minimal-artifact-read +minimal-artifact->alist +afact-get-D +afact-get-Z +afact-get-T +afact-get +afact-get-number/default + + +;; bundles +write-bundle +read-bundle + +;; new artifacts db +with-todays-adb +get-all-artifacts +refresh-artifacts-db + +) + +(import (chicken base) scheme (chicken process) (chicken time posix) + (chicken io) (chicken file) (chicken pathname) + chicken.process-context.posix (chicken string) + (chicken time) (chicken sort) (chicken file posix) (chicken condition) srfi-1 + regex srfi-13 srfi-69 (chicken port) (chicken process-context) + crypt sha1 matchable message-digest sqlite3 typed-records + directory-utils + scsh-process) + +;;====================================================================== +;; DATA MANIPULATION UTILS +;;====================================================================== + +(define-inline (unescape-data data) + (string-translate* data '(("\\n" . "\n") ("\\\\" . "\\")))) + +(define-inline (escape-data data) + (string-translate* data '(("\n" . "\\n") ("\\" . "\\\\")))) + +(define-inline (make-card type data) + (conc type " " (escape-data (->string data)))) + +;; reverse an alist for doing artifactkey -> external key conversions +;; +(define-inline (reverse-aspec aspec) + (map (lambda (dat) + (cons (cdr dat)(car dat))) + aspec)) + +;; add a card to the list of cards, sdat +;; if type is #f return only sdat +;; if data is #f return only sdat +;; +(define-inline (add-card sdat type data) + (if (and type data) + (cons (make-card type data) sdat) + sdat)) + +;;====================================================================== +;; STRING AS FUNKY NUMBER +;;====================================================================== + +;; NOTE: PTDZ are removed as they are reserved. NB// the R card is not used in a +;; ref, instead the P parent card is used. +;; Question: Why does it matter to remove PTDZ? +;; To make the ref easier to use the ref strings will be the keys +;; so we cannot have overlap with any actual keys. But this is a +;; bit silly. What we need to do instead is reject keys of length +;; one where the char is in PTDZ +;; +;; This is basically base92 +;; +(define string-num-chars (string->list "!#$%&'()*+,-./0123456789:;<=>?@ABCEFGHIJKLMNOQRSUVWXY[\\]^_abcdefghijklmnopqrstuvwxyz{|}~")) +;; "0123456789abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOQSUVWXY!#$%&'()*+,-./[]:;<=>?\\^_{}|")) + +(define (char-incr inchar) + (let* ((carry #f) + (next-char (let ((rem (member inchar string-num-chars))) + (if (eq? (length rem) 1) ;; we are at the last character in our string-num-chars list + (begin + (set! carry #t) + (car string-num-chars)) + (cadr rem))))) + (values next-char carry))) + +(define (increment-string str) + (if (string-null? str) + "0" + (let ((strlst (reverse (string->list str)))) ;; need to process the string from the lsd + (list->string + (let loop ((hed (car strlst)) + (tal (cdr strlst)) + (res '())) + (let-values (((newhed carry)(char-incr hed))) + ;; (print "newhed: " newhed " carry: " carry " tal: " tal) + (let ((newres (cons newhed res))) + (if carry ;; we'll have to propagate the carry + (if (null? tal) ;; at the end, tack on "0" (which is really a "1") + (cons (car string-num-chars) newres) + (loop (car tal)(cdr tal) newres)) + (append (reverse tal) newres))))))))) + +;;====================================================================== +;; P K T S D B I N T E R F A C E +;; +;; INTEGER, REAL, TEXT +;;====================================================================== +;; +;; spec +;; ( (tablename1 . (field1name L1 TYPE) +;; (field2name L2 TYPE) ... ) +;; (tablename2 ... )) +;; +;; Example: (tests (testname n TEXT) +;; (rundir r TEXT) +;; ... ) +;; +;; artifact keys are taken from the first letter, if that is not unique +;; then look at the next letter and so on +;; + +;; simplify frequent need to get one result with default +;; +(define (get-one db default qry . params) + (apply fold-row + car + default + db + qry + params)) + +(define (get-rows db qry . params) + (apply fold-row + cons + db + qry + params)) + +;; use this struct to hold the artifactspec and the db handle +;; +(defstruct artifactdb + (fname #f) + (artifactsdb-spec #f) + (artifactspec #f) ;; cache the artifactspec + (field-keys #f) ;; cache the field->key mapping (field1 . k1) ... + (key-fields #f) ;; cache the key->field mapping + (conn #f) + ) + +;; WARNING: There is a simplification in the artifactsdb spec w.r.t. artifactspec. +;; The field specs are the cdr of the table list - not a full +;; list. The extra list level in artifactspec is gratuitous and should +;; be removed. +;; +(define (artifactsdb-spec->artifactspec tables-spec) + (map (lambda (tablespec) + (list (car tablespec) + (map (lambda (field-spec) + (cons (car field-spec)(cadr field-spec))) + (cdr tablespec)))) + tables-spec)) + +(define (artifactsdb-open dbfname artifactsdb-spec) + (let* ((pdb (make-artifactdb)) + (dbexists (file-exists? dbfname)) + (db (open-database dbfname))) + (artifactdb-artifactsdb-spec-set! pdb artifactsdb-spec) + (artifactdb-artifactspec-set! pdb (artifactsdb-spec->artifactspec artifactsdb-spec)) + (artifactdb-fname-set! pdb dbfname) + (artifactdb-conn-set! pdb db) + (if (not dbexists) + (artifactsdb-init pdb)) + pdb)) + +(define (artifactsdb-init artifactsdb) + (let* ((db (artifactdb-conn artifactsdb)) + (artifactsdb-spec (artifactdb-artifactsdb-spec artifactsdb))) + ;; create a table for the artifacts themselves + (execute db "CREATE TABLE IF NOT EXISTS artifacts (id INTEGER PRIMARY KEY, zkey TEXT, record_id INTEGER, artifact TEXT);") + (for-each + (lambda (table) + (let* ((table-name (car table)) + (fields (cdr table)) + (stmt (conc "CREATE TABLE IF NOT EXISTS " + table-name + " (id INTEGER PRIMARY KEY," + (string-intersperse + (map (lambda (fieldspec) + (conc (car fieldspec) " " + (caddr fieldspec))) + fields) + ",") + ");"))) + (execute db stmt))) + artifactsdb-spec))) + +;; create artifact from the data and insert into artifacts table +;; +;; data is assoc list of (field . value) ... +;; tablename is a symbol matching the table name +;; +(define (artifactsdb-add-record artifactsdb tablename data #!optional (parent #f)) + (let*-values (((zkey artifact) (alist->artifact data (artifactdb-artifactspec artifactsdb) ptype: tablename))) + ;; have the data as alist so insert it into appropriate table also + (let* ((db (artifactdb-conn artifactsdb))) + ;; TODO: Address collisions + (execute db "INSERT INTO artifacts (zkey,artifact,record_id) VALUES (?,?,?);" + zkey artifact -1) + (let* (;; (artifactid (artifactsdb-artifactkey->artifactid artifactsdb artifactkey)) + (record-id (artifactsdb-insert artifactsdb tablename data))) + (execute db "UPDATE artifacts SET record_id=? WHERE zkey=?;" + record-id zkey) + )))) + +;; +(define (artifactsdb-insert artifactsdb tablename data) + (let* ((db (artifactdb-conn artifactsdb)) + (stmt (conc "INSERT INTO " tablename + " (" (string-intersperse (map conc (map car data)) ",") + ") VALUES ('" + ;; TODO: Add lookup of data type and do not + ;; wrap integers with quotes + (string-intersperse (map conc (map cdr data)) "','") + "');"))) + (print "stmt: " stmt) + (execute db stmt) + ;; lookup the record-id and return it + + )) + +(define (artifactsdb-close artifactsdb) + (finalize! (artifactdb-conn artifactsdb))) + +;; (let loop ((s "0")(n 0))(print s)(if (< n 5000)(loop (increment-string s)(+ n 1)))) + +;;====================================================================== +;; CARDS, MISC and UTIL +;;====================================================================== + +;; given string (likely multi-line) "dat" return shar1 hash +;; +(define (calc-sha1 instr) + (message-digest-string + (sha1-primitive) + instr)) + +;; given a single card return its type and value +;; +(define (card->type/value card) + (let ((ctype (substring card 0 1)) + (cval (substring card 2 (string-length card)))) + (values (string->symbol ctype) cval))) + +;;====================================================================== +;; SDAT procs +;; sdat is legacy/internal usage. Intention is to remove sdat calls from +;; the exposed calls. +;;====================================================================== + +;; sort list of cards +;; +(define-inline (sort-cards sdat) + (sort sdat string<=?)) + +;; artifact rules +;; 1. one card per line +;; 2. at least one card +;; 3. no blank lines + +;; given sdat, a list of cards return uuid, packet (as sdat) +;; +(define (add-z-card sdat) + (let* ((sorted-sdat (sort-cards sdat)) + (dat (string-intersperse sorted-sdat "\n")) + (uuid (calc-sha1 dat))) + (values + uuid + (conc + dat + "\nZ " + uuid)))) + +(define (check-artifact artifact) + (handle-exceptions + exn + #f ;; anything goes wrong - call it a crappy artifact + (let* ((sdat (string-split artifact "\n")) + (rdat (reverse sdat)) ;; reversed + (zdat (car rdat)) + (Z (cadr (string-split zdat))) + (cdat (string-intersperse (reverse (cdr rdat)) "\n"))) + (equal? Z (calc-sha1 cdat))))) + +;;====================================================================== +;; AARTIFACTs +;;====================================================================== + +;; convert a sdat (list of cards) to an alist +;; +(define (sdat->alist sdat) + (let loop ((hed (car sdat)) + (tal (cdr sdat)) + (res '())) + (let-values (( (ctype cval)(card->type/value hed) )) + ;; if this card is not one of the common ones tack it on to rem + (let* ((oldval (alist-ref ctype res)) + (newres (cons (cons ctype + (if oldval ;; list or string + (if (list? oldval) + (cons cval oldval) + (cons cval (list oldval))) + cval)) + res))) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist +;; (t . "v1.63/tip/dev") +;; (c . "QUICKPATT") +;; (T . "runstart") +;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd") +;; (D . "1488995096.0")) +;; (id . 8) +;; (group-id . 0) +;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b") +;; (parent . "") +;; (artifact-type . "runstart") +;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b")) +;; +;; artifactspec is alist of alists mapping types and nicekeys to keys +;; +;; '((posting . ((title . t) +;; (url . u) +;; (blurb . b))) +;; (comment . ((comment . c) +;; (score . s)))) + +;; DON'T USE? +;; +(define (get-value field dartifact . spec-in) + (if (null? spec-in) + (alist-ref field dartifact) + (let* ((spec (car spec-in)) + (aartifact (alist-ref 'aartifact dartifact))) ;; get the artifact alist + (if (and aartifact spec) + (let* ((ptype (alist-ref 'artifact-type dartifact)) + (pspec (alist-ref (string->symbol ptype) spec))) ;; do we have a spec for this type of artifact + (and pspec + (let* ((key (alist-ref field pspec))) + (and key (alist-ref key aartifact))))) + #f)))) + +;; convert a dartifact to a pure alist given a artifactspec +;; this flattens out the alist to include the data from +;; the queue database record +;; +(define (dartifact->alist dartifact artifactspec) + (let* ((aartifact (alist-ref 'aartifact dartifact)) + (artifact-type (or (alist-ref 'artifact-type dartifact) ;; artifact-type is from the database field artifact_type + (alist-ref 'T aartifact))) + (artifact-fields (alist-ref (string->symbol artifact-type) artifactspec)) + (rev-fields (if artifact-fields + (reverse-aspec artifact-fields) + '()))) + (append (map (lambda (entry) + (let* ((artifact-key (car entry)) + (new-key (or (alist-ref artifact-key rev-fields) artifact-key))) + `(,new-key . ,(cdr entry)))) + aartifact) + dartifact))) + +;; convert a list of dartifacts into a list of alists using artifact-spec +;; +(define (dartifacts->alists dartifacts artifact-spec) + (map (lambda (x) + (dartifact->alist x artifact-spec)) + dartifacts)) + +;; Generic flattener, make the tuple and artifact into a single flat alist +;; +;; qry-result-spec is a list of symbols corresponding to each field +;; +(define (flatten-all inlst artifactspec . qry-result-spec) + (map + (lambda (tuple) + (dartifact->alist + (apply dblst->dartifacts tuple qry-result-spec) + artifactspec)) + inlst)) + +;; call like this: +;; (construct-sdat 'a "a data" 'S "S data" ...) +;; returns list of cards +;; ( "A a value" "D 12345678900" ...) +;; +(define (construct-sdat . alldat) + (let ((have-D-card #f)) ;; flag + (if (even? (length alldat)) + (let loop ((type (car alldat)) + (data (cadr alldat)) + (tail (cddr alldat)) + (res '())) + (if (eq? type 'D)(set! have-D-card #t)) + (if (null? tail) + (if have-D-card ;; return the constructed artifact, add a D card if none found + (add-card res type data) + (add-card + (add-card res 'D (current-seconds)) + type data)) + (loop (car tail) + (cadr tail) + (cddr tail) + (add-card res type data)))) + #f))) ;; #f means it failed to create the sdat + +(define (construct-artifact . alldat) + (add-z-card + (apply construct-sdat alldat))) + +;;====================================================================== +;; CONVERTERS +;;====================================================================== + +(define (artifact->sdat artifact) + (map unescape-data (string-split artifact "\n"))) + +;; given a pure artifact return an alist +;; +(define (artifact->alist artifact #!key (artifactspec #f)) + (let ((sdat (cond + ((string? artifact) (artifact->sdat artifact)) + ((list? artifact) artifact) + (else #f)))) + (if artifact + (if artifactspec + (dartifact->alist (list (cons 'aartifact (sdat->alist sdat))) artifactspec) + (sdat->alist sdat)) + #f))) + +;; convert an alist to an sdat +;; in: '((a . "blah")(b . "foo")) +;; out: '("a blah" "b foo") +;; +(define (alist->sdat adat) + (map (lambda (dat) + (conc (car dat) " " (cdr dat))) + adat)) + +;; adat is the incoming alist, aspec is the mapping +;; from incoming key to the artifact key (usually one +;; letter to keep data tight) see the artifactspec at the +;; top of this file +;; +;; NOTE: alists can contain multiple instances of the same key (supported fine by artifacts) +;; but you (obviously I suppose) cannot use alist-ref to access those entries. +;; +(define (alist->artifact adat aspec #!key (ptype #f)(no-d #f)) + (let* ((artifact-type (or ptype + (alist-ref 'T adat) ;; can provide in the incoming alist + #f)) + (artifact-spec (if artifact-type ;; alist of external-key -> key + (or (alist-ref artifact-type aspec) '()) + (if (null? aspec) + '() + (cdar aspec)))) ;; default to first one if nothing specified + (new-alist (map (lambda (dat) + (let* ((key (car dat)) + (val (cdr dat)) + (newkey (or (alist-ref key artifact-spec) + key))) + (cons newkey (escape-data (conc val))))) ;; convert all incoming data (symbols, numbers etc.) to a string and then escape newlines. + adat)) + (new-with-type (if (alist-ref 'T new-alist) + new-alist + (cons `(T . ,artifact-type) new-alist))) + (with-d-card (if (or no-d ;; no timestamp wanted + (alist-ref 'D new-with-type)) + new-with-type + (cons `(D . ,(current-seconds)) + new-with-type)))) + (add-z-card + (alist->sdat with-d-card)))) + +;;====================================================================== +;; D B Q U E U E I N T E R F A C E +;;====================================================================== + +;; artifacts ( +;; id SERIAL PRIMARY KEY, +;; uuid TEXT NOT NULL, +;; parent_uuid TEXT default '', +;; artifact_type INTEGER DEFAULT 0, +;; group_id INTEGER NOT NULL, +;; artifact TEXT NOT NULL + +;; schema is list of SQL statements - can be used to extend db with more tables +;; +(define (open-queue-db dbpath dbfile #!key (schema '())) + (let* ((dbfname (conc dbpath "/" dbfile)) + (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f))) + (db (open-database dbfname))) + ;; (set-busy-handler! (dbi:db-conn db) (busy-timeout 10000)) + (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. + (for-each + (lambda (stmt) + (execute db stmt)) + (cons "CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER PRIMARY KEY, + group_id INTEGER NOT NULL, + uuid TEXT NOT NULL, + parent_uuid TEXT TEXT DEFAULT '', + artifact_type TEXT NOT NULL, + artifact TEXT NOT NULL, + processed INTEGER DEFAULT 0)" + schema))) ;; 0=not processed, 1=processed, 2... for expansion + db)) + +(define (add-to-queue db artifact uuid artifact-type parent-uuid group-id) + (execute db "INSERT INTO artifacts (uuid,parent_uuid,artifact_type,artifact,group_id) + VALUES(?,?,?,?,?);" ;; $1,$2,$3,$4,$5);" + uuid + (if parent-uuid parent-uuid "");; use null string as placeholder for no parent uuid. + (if artifact-type (conc artifact-type) "") + artifact + group-id)) + +;; given all needed parameters create a artifact and store it in the queue +;; procs is an alist that maps artifact-type to a function that takes a list of artifact params +;; in data and returns the uuid and artifact +;; +(define (create-and-queue conn procs artifact-type parent-uuid group-id data) + (let ((proc (alist-ref artifact-type procs))) + (if proc + (let-values (( (uuid artifact) (proc data) )) + (add-to-queue conn artifact uuid artifact-type parent-uuid group-id) + uuid) + #f))) + +;; given uuid get artifact, if group-id is specified use it (reduces probablity of +;; being messed up by a uuid collision) +;; +(define (lookup-by-uuid db artifact-uuid group-id) + (if group-id + (get-one db "SELECT artifact FROM artifacts WHERE group_id=? AND uuid=?;" group-id artifact-uuid) + (get-one db "SELECT artifact FROM artifacts WHERE uuid=?;" artifact-uuid))) + +;; find a packet by its id +;; +(define (lookup-by-id db id) + (get-one db "SELECT artifact FROM artifacts WHERE id=?;" id)) + + +;;====================================================================== +;; P R O C E S S P K T S +;;====================================================================== + +;; given a list of field values pulled from the queue db generate a list +;; of dartifact's +;; +(define (dblst->dartifacts lst . altmap) + (let* ((maplst (if (null? altmap) + '(id group-id uuid parent artifact-type artifact processed) + altmap)) + (res (map cons maplst lst))) ;; produces list of pairs, i.e an alist + (cons `(aartifact . ,(artifact->alist (alist-ref 'artifact res))) + res))) + +;; NB// ptypes is a list of symbols, '() or #f find all types +;; +(define (get-dartifacts db ptypes group-id parent-uuid #!key (uuid #f)) + (let* ((ptype-qry (if (and ptypes + (not (null? ptypes))) + (conc " IN ('" (string-intersperse (map conc ptypes) "','") "')") + (conc " LIKE '%' "))) + (rows (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE artifact_type " ptype-qry " AND group_id=? + AND processed=0 " + (if parent-uuid (conc "AND parent_uuid='" parent-uuid "' ") "") + (if uuid (conc "AND uuid='" uuid "' ") "") + "ORDER BY id DESC;") + group-id))) + (map dblst->dartifacts (map vector->list rows)))) + +;; get N artifacts not yet processed for group-id +;; +(define (get-not-processed-artifacts db group-id artifact-type limit offset) + (map dblst->dartifacts + (map vector->list + (get-rows + db + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE artifact_type = ? AND group_id = ? AND processed=0 + LIMIT ? OFFSET ?;" + (conc artifact-type) ;; convert symbols to string + group-id + limit + offset + )))) + +;; given a uuid, get not processed child artifacts +;; +(define (get-related db group-id uuid) + (map dblst->dartifacts + (get-rows + db + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE parent_uuid=? AND group_id=? AND processed=0;" + uuid group-id))) + +;; generic artifact processor +;; +;; find all packets in group-id of type in ptypes and apply proc to artifactdat +;; +(define (process-artifacts conn group-id ptypes parent-uuid proc) + (let* ((artifacts (get-dartifacts conn ptypes group-id parent-uuid))) + (map proc artifacts))) + +;; criteria is an alist ((k . valpatt) ...) +;; - valpatt is a regex +;; - ptypes is a list of types (symbols expected) +;; match-type: 'any or 'all +;; +(define (find-artifacts db ptypes criteria #!key (processed #f)(match-type 'any)(artifact-spec #f)) ;; processed=#f, don't use, else use + (let* ((artifacts (get-dartifacts db ptypes 0 #f)) + (match-rules (lambda (artifactdat) ;; returns a list of matching rules + (filter (lambda (c) + ;; (print "c: " c) + (let* ((ctype (car c)) ;; card type + (rx (cdr c)) ;; card pattern + ;; (t (alist-ref 'artifact-type artifactdat)) + (artifact (alist-ref 'artifact artifactdat)) + (aartifact (artifact->alist artifact)) + (cdat (alist-ref ctype aartifact))) + ;; (print "cdat: " cdat) ;; " aartifact: " aartifact) + (if cdat + (string-match rx cdat) + #f))) + criteria))) + (res (filter (lambda (artifactdat) + (if (null? criteria) ;; looking for all artifacts + #t + (case match-type + ((any)(not (null? (match-rules artifactdat)))) + ((all)(eq? (length (match-rules artifactdat))(length criteria))) + (else + (print "ERROR: bad match type " match-type ", expecting any or all."))))) + artifacts))) + (if artifact-spec + (dartifacts->alists res artifact-spec) + res))) + +;; get descendents of parent-uuid +;; +;; NOTE: Should be doing something like the following: +;; +;; given a uuid, get not processed child artifacts +;; processed: +;; #f => get all +;; 0 => get not processed +;; 1 => get processed +;; +(define (get-ancestors db group-id uuid #!key (processed #f)) + (map dblst->dartifacts + (map vector->list + (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed + FROM artifacts + WHERE uuid IN + (WITH RECURSIVE + tree(uuid,parent_uuid) + AS + ( + SELECT uuid, parent_uuid + FROM artifacts + WHERE uuid = ? + UNION ALL + SELECT t.uuid, t.parent_uuid + FROM artifacts t + JOIN tree ON t.uuid = tree.parent_uuid + ) + SELECT uuid FROM tree) + AND group_id=?" (if processed (conc " AND processed=" processed) "") ";") + uuid group-id)))) + +;; Untested +;; +(define (get-descendents db group-id uuid #!key (processed #f)) + (map dblst->dartifacts + (map vector->list + (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed + FROM artifacts + WHERE uuid IN + (WITH RECURSIVE + tree(uuid,parent_uuid) + AS + ( + SELECT uuid, parent_uuid + FROM artifacts + WHERE uuid = ? + UNION ALL + SELECT t.uuid, t.parent_uuid + FROM artifacts t + JOIN tree ON t.parent_uuid = tree.uuid + ) + SELECT uuid FROM tree) + AND group_id=?" (if processed (conc " AND processed=" processed) "") ";") + uuid group-id)))) + +;; look up descendents based on given info unless passed in a list via inlst +;; +;; (define (get-last-descendent db group-id uuid #!key (processed #f)(inlst #f)) +;; (let ((descendents (or inlst (get-descendents db group-id uuid processed: processed)))) +;; (if (null? descendents) +;; #f +;; (last descendents)))) + +;;====================================================================== +;; A R C H I V E S - always to a sqlite3 db +;;====================================================================== + +;; open an archive db +;; path: archive-dir//month.db +;; +#;(define (archive-open-db archive-dir) + (let* ((curr-time (seconds->local-time (current-seconds))) + (dbpath (conc archive-dir "/" (time->string curr-time "%Y"))) + (dbfile (conc dbpath "/" (time->string curr-time "%m") ".db")) + (dbexists (if (file-exists? dbfile) #t (begin (create-directory dbpath #t) #f)))) + (let ((db (open-database dbfile))) + ;; (set-busy-handler! db (busy-timeout 10000)) + (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. + (execute db "CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER, + group_id INTEGER, + uuid TEXT, + parent_uuid TEXT, + artifact_type TEXT, + artifact TEXT, + processed INTEGER DEFAULT 0)")) + db))) + +;; turn on transactions! otherwise this will be painfully slow +;; +#;(define (write-archive-artifacts src-db db artifact-ids) + (let ((artifacts (get-rows + src-db + (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact FROM artifacts WHERE id IN (" + (string-intersperse (map conc artifact-ids) ",") ")")))) + ;; (dbi:with-transaction + ;; db + (lambda () + (for-each + (lambda (artifact) + (apply execute db "INSERT INTO artifacts (id,group_id,uuid,parent_uuid,artifact_type,artifact) + VALUES (?,?,?,?,?,?)" + artifact)) + artifacts)))) ;; ) + +;; given a list of uuids and lists of uuids move all to +;; the sqlite3 db for the current archive period +;; +#;(define (archive-artifacts conn artifact-ids archive-dir) + (let ((db (archive-open-db archive-dir))) + (write-archive-artifacts conn db artifact-ids) + (finalize! db)) + ;; (pg:with-transaction + ;; conn + ;; (lambda () + (for-each + (lambda (id) + (get-one + conn + "DELETE FROM artifacts WHERE id=?" id)) + artifact-ids)) ;; )) + +;; given a list of ids mark all as processed +;; +(define (mark-processed conn artifact-ids) + ;; (pg:with-transaction + ;; conn + ;; (lambda () + (for-each + (lambda (id) + (get-one + conn + "UPDATE artifacts SET processed=1 WHERE id=?;" id)) + artifact-ids)) ;; x)) + +;; a generic artifact getter, gets from the artifacts db +;; +(define (get-artifacts conn ptypes) + (let* ((ptypes-str (if (null? ptypes) + "" + (conc " WHERE artifact_type IN ('" (string-intersperse ptypes ",") "') "))) + (qry-str (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts" ptypes-str))) + (map vector->list (get-rows conn qry-str)))) + +;; make a report of the artifacts in the db +;; ptypes of '() gets all artifacts +;; display-fields +;; +(define (make-report dest conn artifactspec display-fields . ptypes) + (let* (;; (conn (dbi:db-conn (s:db))) + (all-rows (get-artifacts conn ptypes)) + (all-artifacts (flatten-all + all-rows + artifactspec + 'id 'group-id 'uuid 'parent 'artifact-type 'artifact 'processed)) + (by-uuid (let ((ht (make-hash-table))) + (for-each + (lambda (artifact) + (let ((uuid (alist-ref 'uuid artifact))) + (hash-table-set! ht uuid artifact))) + all-artifacts) + ht)) + (by-parent (let ((ht (make-hash-table))) + (for-each + (lambda (artifact) + (let ((parent (alist-ref 'parent artifact))) + (hash-table-set! ht parent (cons artifact (hash-table-ref/default ht parent '()))))) + all-artifacts) + ht)) + (oup (if dest (open-output-file dest) (current-output-port)))) + + (with-output-to-port + oup + (lambda () + (print "digraph megatest_state_status { + // ranksep=0.05 + rankdir=LR; + node [shape=\"box\"]; +") + ;; first all the names + (for-each + (lambda (artifact) + (let* ((uuid (alist-ref 'uuid artifact)) + (shortuuid (substring uuid 0 4)) + (type (alist-ref 'artifact-type artifact)) + (processed (alist-ref 'processed artifact))) + + (print "\"" uuid "\" [label=\"" shortuuid ", (" + type ", " + (if processed "processed" "not processed") ")") + (for-each + (lambda (key-field) + (let ((val (alist-ref key-field artifact))) + (if val + (print key-field "=" val)))) + display-fields) + (print "\" ];"))) + all-artifacts) + ;; now for parent-child relationships + (for-each + (lambda (artifact) + (let ((uuid (alist-ref 'uuid artifact)) + (parent (alist-ref 'parent artifact))) + (if (not (equal? parent "")) + (print "\"" parent "\" -> \"" uuid"\";")))) + all-artifacts) + + (print "}") + )) + (if dest + (begin + (close-output-port oup) + (system "dot -Tpdf out.dot -o out.pdf"))) + + )) + +;;====================================================================== +;; Read ref artifacts into a vector < laststr hash table > +;;====================================================================== + + + +;;====================================================================== +;; Read/write packets to files (convience functions) +;;====================================================================== + +;; write alist to a artifact file +;; +(define (write-alist->artifact targdir dat #!key (artifactspec '())(ptype #f)) + (let-values (((uuid artifact)(alist->artifact dat artifactspec ptype: ptype))) + (with-output-to-file (conc targdir "/" uuid ".artifact") + (lambda () + (print artifact))) + uuid)) ;; return the uuid + +;; read artifact into alist +;; +(define (read-artifact->alist artifact-file #!key (artifactspec #f)) + (artifact->alist (with-input-from-file + artifact-file + read-string) + artifactspec: artifactspec)) + +;;====================================================================== +;; File utils, stuff useful for file management +;;====================================================================== + +(define (file-get-sha1 fname) + (let* ((sha1-res (run/strings (sha1sum ,fname)))) + (car (string-split (car sha1-res))))) + +(define (link-or-copy srcf destf) + (or (handle-exceptions + exn + #f + (file-link srcf destf)) + (if (file-exists? destf) + (print "NOTE: destination already exists, skipping copy.") + (copy-file srcf destf)))) + +;; (define (files-diff file1 file2) +;; (let* ((diff-res (with-input-from-port +;; (run/port (diff "-q" ,file1 ,file2)) +;; (lambda () +;; (let* ((res (read-line))) +;; (read-lines) +;; res))))) +;; (car (string-split sha1-res)))) +;; + + +(define (check-same file1 file2) + (cond + ((not (and (file-exists? file1)(file-exists? file2))) #f) + ((not (equal? (file-size file1)(file-size file2))) #f) + (else + (let-values (((status run-ok process-id) + (run (diff "-q" ,file1 ,file2)))) + status)))) + +(define *pcache* (make-hash-table)) +(define (get-device dir) + (let ((indat (or (hash-table-ref/default *pcache* dir #f) + (let* ((inp (open-input-pipe (conc "df --output=source \""dir"\""))) + (res (read-lines inp))) + (close-input-port inp) + (hash-table-set! *pcache* dir res) + res)))) + (cadr indat))) + +(define (same-partition? dir1 dir2) + (equal? (get-device dir1)(get-device dir2))) + +(define (link-if-same-partition file1 file2) + (let* ((dir1 (pathname-directory file1)) + (dir2 (pathname-directory file2)) + (f1 (pathname-file file1)) + (f2 (pathname-file file2))) + (if (same-partition? dir1 dir2) + (let* ((tmpname (conc "."f2"-"(current-seconds)))) + ;; this steps needs to be executed as actual user + (move-file file2 (conc dir1 "/" tmpname)) + (file-link file1 file2) + (delete-file (conc dir1 "/" tmpname)))))) + +(define (uuid-first-two-letters sha1sum) + (substring sha1sum 0 2)) + +(define (uuid-remaining-letters sha1sum) + (let ((slen (string-length sha1sum))) + (substring sha1sum 2 slen))) + +(define (archive-dest destd sha1sum) + (let* ((subdir (uuid-first-two-letters sha1sum)) ;; (substring sha1sum 0 2)) + ;; (slen (string-length sha1sum)) + (rem sha1sum #;(uuid-remaining-letters sha1sum)) ;; (substring sha1sum 3 slen)) + (full-dest-dir (conc destd"/"subdir)) + (full-dest-file (conc full-dest-dir"/"rem))) + (if (not (directory-exists? full-dest-dir)) + (create-directory full-dest-dir #t)) + full-dest-file)) + +(define (write-to-archive data destd #!optional (nextnum #f)) + (let* ((sha1sum (calc-sha1 data)) + (full-dest (conc (archive-dest destd sha1sum) + (if nextnum (conc "."nextnum) "")))) + (if (file-exists? full-dest) + (if (equal? (string-intersperse (with-input-from-file full-dest read-lines) "\n") + data) + (begin + ;; (print "INFO: data already exists in "full-dest" and is identical") + sha1sum) + (let ((nextnum (if nextnum (+ nextnum 1) 0))) + (print "WARN: data already exists in "full-dest" but is different! Trying again...") + (write-to-archive data destd nextnum))) + (begin + (with-output-to-file + full-dest + (lambda () + (print data))) + sha1sum)))) ;; BUG? Does print munge data? + +;; copy srcf with sha1sum aabc... to aa/bc... +;; +(define (archive-copy srcf destd sha1sum) + (let* ((full-dest-file (archive-dest destd sha1sum))) + (let loop ((trynum 0)) + (let ((dest-name (if (> trynum 0) + (conc full-dest-file"-"trynum) + full-dest-file))) + (cond + ((not (file-exists? srcf)) #f) ;; this should be an error? + ((and (file-exists? srcf) + (file-exists? dest-name)) + (if (check-same srcf dest-name) + (link-if-same-partition dest-name srcf) + (loop (+ trynum 1)))) ;; collisions are rare, this protects against them + ((not (file-exists? dest-name)) + (link-or-copy srcf dest-name)) + (else #f)))))) + +;; multi-glob +(define (multi-glob globstrs inpath) + ;; (print "multi-glob: "globstrs", "inpath) + (if (equal? inpath "") + globstrs + (let* ((parts (string-split inpath "/" #t)) + (nextpart (car parts)) + (remaining (string-intersperse (cdr parts) "/"))) + (if (and (equal? nextpart "") ;; this must be a leading / meaning root directory + (null? globstrs)) + (multi-glob '("/") remaining) + (begin + ;; (print "nextpart="nextpart", remaining="remaining) + (apply append + (map (lambda (gstr) + (let* ((pathstr (conc gstr"/"nextpart)) + (pathstrs (glob pathstr))) + ;; (print "pathstr="pathstr) + (multi-glob pathstrs remaining))) + globstrs))))))) + + +;; perm[/user:group]: +;; DDD - octal perm (future expansion) +;; - - use umask/defacto perms (i.e. don't actively do anything) +;; x - mark as executable +;; +;; Cards: +;; file: f perm fname +;; directory: d perm fname artifactid +;; link: l perm lname destpath +;; +;; NOTE: cards are kept as (C . "value") +;; +;; given a directory path, ignore list and artifact store (hash-table): +;; 1. create sha1 tree at dest (e.g. aa/b3a7 ...) +;; 2. create artifact for each dir +;; - cards for all files +;; - cards for files that are symlinks or executables +;; 3. return (artifactid . artifact) +;; +;; NOTES: +;; Use destdir of #f to not create sha1 tree +;; Hard links will be used if srcdir and destdir appear to be same partion +;; +;; (alist->artifact adat aspec #!key (ptype #f)) +;; +;; +;; (load "../../artifacts/artifacts.scm")(import big-chicken srfi-69 artifacts)(define dirdat (make-hash-table)) +;; (capture-dir ".." ".." "/tmp/junk" '() dirdat) +;; +;; [procedure] (file-type FILE [LINK [ERROR]]) +;; Returns the file-type for FILE, which should be a filename, a file-descriptor or a port object. If LINK is given and true, symbolic-links are not followed: +;; +;; regular-file +;; directory +;; fifo +;; socket +;; symbolic-link +;; character-device +;; block-device +;; Note that not all types are supported on every platform. If ERROR is given and false, then file-type returns #f if the file does not exist; otherwise, it signals an error. +;; +;; +(define (capture-dir curr-dir src-dir dest-dir ignore-list artifacts all-seen) + (let* ((dir-dat (directory-fold + (lambda (fname res) ;; res is a list of artifact cards + (let* ((fullname (conc curr-dir"/"fname))) + ;; (print "INFO: processing "fullname) + (if (hash-table-ref/default all-seen fullname #f) ;; something circular going on + (begin + (print "WARNING: possible circular link(s) "fullname) + res) + (let* ((ftype (file-type fullname #t #f))) + (hash-table-set! all-seen fullname ftype) + (cons + (case ftype ;; get the card + ((directory) ;; (directory? fullname) + (let* ((new-curr-dir (conc curr-dir"/"fname)) + (new-src-dir (conc src-dir"/"fname))) + (let* ((dir-dat (capture-dir new-curr-dir new-src-dir + dest-dir ignore-list artifacts all-seen)) + (a-id (car dir-dat)) + (artf (cdr dir-dat))) + (hash-table-set! artifacts a-id artf) + (cons 'd (conc "- "a-id" "fname))))) ;; the card + ((symbolic-link) ;; (symbolic-link? fullname) + (let ((ldest (read-symbolic-link fullname))) + (cons 'l (conc "- "fname"/"ldest)))) ;; delimit link name from dest with / + ((regular-file) ;; must be a file + (let* ((start (current-seconds)) + (sha1sum (file-get-sha1 fullname)) + (perms (if (file-executable? fullname) "x" "-"))) + (let ((runtime (- (current-seconds) start))) + (if (> runtime 1) + (print "INFO: file "fullname" took "runtime" seconds to calculate sha1."))) + (if dest-dir + (archive-copy fullname dest-dir sha1sum)) + (cons 'f (conc perms " "sha1sum" "fname)))) + (else + (print "WARNING: file "fullname" of type "ftype" is NOT supported and will converted to empty file.") + (let* ((sha1sum (write-to-archive "" dest-dir))) + (cons 'f (conc "- "sha1sum" "fname))))) + res))))) + '() src-dir #:dotfiles? #t))) ;; => (values srcdir_artifact sub_artifacts_list) + ;; (print "dir-dat: " dir-dat) + (let-values (((a-id artf) + (alist->artifact dir-dat '() ptype: 'd no-d: #t))) + (hash-table-set! artifacts a-id artf) + (cons a-id artf)))) + +;; maybe move this into artifacts? +;; +;; currently moves *.artifact into a bundle and moves the artifacts into attic +;; future: move artifacts under 1 meg in size into bundle up to 10 meg in size +;; +(define (artifact-rollup bundle-dir) ;; cfg storepath) + ;; (let* ((bundle-dir (calc-bundle-dir cfg storepath))) + (let* ((bundles (glob (conc bundle-dir"/*.bundle"))) + (artifacts (glob (conc bundle-dir"/*.artifact")))) + (if (> (length artifacts) 30) ;; rollup only if > 30 artifacts + ;; if we have unbundled artifacts, bundle them + (let* ((ht (read-artifacts-into-hash #f artifacts: artifacts)) + (bundle (hash-of-artifacts->bundle ht))) + (write-bundle bundle bundle-dir) + (create-directory (conc bundle-dir"/attic") #t) + (for-each + (lambda (full-fname) + (let* ((fname (pathname-strip-directory full-fname)) + (newname (conc bundle-dir"/attic/"fname))) + (move-file full-fname newname #t))) + artifacts) + (conc "bundled "(length artifacts))) + "not enough artifacts to bundle"))) + +;; if destfile is a directory then calculate the sha1sum of the bundle and store it +;; by .bundle +;; +;; incoming dat is pure text (bundle already sorted and appended: +;; +(define (write-bundle bdl-data destdir) + (let* ((bdl-uuid (calc-sha1 bdl-data))) + (with-output-to-file + (conc destdir"/"bdl-uuid".bundle") + (lambda () + (print bdl-data))))) + +;; minimal (and hopefully fast) artifact reader +;; TODO: Add check of shar sum. +;; +(define (minimal-artifact-read fname) + (let* ((indat (with-input-from-file fname read-lines))) + (if (null? indat) + (values #f (conc "did not find an artifact in "fname)) + (let* ((zcard (last indat)) + (cardk (substring zcard 0 1)) + (cardv (substring zcard 2 (string-length zcard)))) + (if (equal? cardk "Z") + (values cardv (string-intersperse indat "\n")) + (values #f (conc fname" is not a valid artifact"))))))) + +;; read artifacts from directory into hash +;; NOTE: support for max-count not implemented yet +;; +(define (read-artifacts-into-hash dir #!key (artifacts #f) (max-count #f)(ht #f)) + (let* ((artifacts (or artifacts + (glob (conc dir"/*.artifact")))) + (ht (or ht (make-hash-table)))) + (for-each + (lambda (fname) + (let-values (((uuid afct) + (minimal-artifact-read fname))) + (hash-table-set! ht uuid afct))) + artifacts) + ht)) + +;; ht is: +;; uuid => artifact text +;; use write-bundle to put result into a bundle file +;; +(define (hash-of-artifacts->bundle ht) + (fold (lambda (k res) + (let* ((v (hash-table-ref ht k))) + (if res + (conc res"\n"v) + v))) + #f + (sort (hash-table-keys ht) string<=?))) + +;; minimal artifact to alist +;; +(define (minimal-artifact->alist afact) + (let* ((lines (string-split afact "\n"))) + (map (lambda (a) + (let* ((key (string->symbol (substring a 0 1))) + (sl (string-length a)) + (val (if (> sl 2) + (substring a 2 sl) + ""))) + (cons key val))) + lines))) + +;; some accessors for common cards +(define (afact-get-D afact) + (let ((dval (alist-ref 'D afact))) + (if dval + (string->number dval) + #f))) + +(define (afact-get-T afact) ;; get the artifact type as a symbol + (let ((val (alist-ref 'T afact))) + (if val + (string->symbol val) + val))) + +(define (afact-get-Z afact) + (alist-ref 'Z afact)) + +(define (afact-get afact key default) + (or (alist-ref key afact) + default)) + +(define (afact-get-number/default afact key default) + (let ((val (alist-ref key afact))) + (if val + (or (string->number val) default) ;; seems wrong + default))) + +;; bundles are never big and reading into memory for processing is fine +;; +(define (read-bundle srcfile #!optional (mode 'uuid-raw)) + (let* ((indat (with-input-from-file srcfile read-lines))) + (let loop ((tail indat) + (dat '()) ;; artifact being extracted + (res '())) ;; list of artifacts + (if (null? tail) + (reverse res) ;; last dat should be empty list + (let* ((curr-line (car tail))) + (let-values (((ctype cdata) + (card->type/value curr-line))) + (let* ((is-z-card (eq? 'Z ctype)) + (new-dat (cons (case mode + ((uuid-raw) curr-line) + (else (cons ctype cdata))) + dat))) + (if is-z-card + (loop (cdr tail) ;; done with this artifact + '() + (cons (case mode + ((uuid-raw) (cons cdata (string-intersperse (reverse new-dat) "\n"))) + (else (reverse new-dat))) + res)) + (loop (cdr tail) + new-dat + res))))))))) + + +;; find all .bundle and .artifacts files in bundle-dir +;; and inport them into sqlite handle adb +;; +(define (refresh-artifacts-db adb bundle-dir) + (let* ((bundles (glob (conc bundle-dir"/*.bundle"))) + (artifacts (glob (conc bundle-dir"/*.artifact"))) + (uuids (get-all-uuids adb 'hash))) + (with-transaction + adb + (lambda () + (for-each + (lambda (bundle-file) + ;; (print "Importing artifacts from "bundle-file) + (let* ((bdat (read-bundle bundle-file 'uuid-raw)) + (count 0) + (inc (lambda ()(set! count (+ count 1))))) + (for-each + (lambda (adat) + (match + adat + ((zval . artifact) + (if (not (hash-table-exists? uuids zval)) + (begin + ;; (print "INFO: importing new artifact "zval" from bundle "bundle-file) + (inc) + (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);" + zval artifact) + (hash-table-set! uuids zval #t)))) + (else + (print "ERROR: Bad artifact data "adat)))) + bdat) + (print "INFO: imported "count" artifacts from "bundle-file))) + bundles) + (for-each + (lambda (artifact-file) + ;; (print "Importing artifact from "artifact-file) + (let-values (((uuid artifact) (minimal-artifact-read artifact-file))) + (if uuid + (if (not (hash-table-exists? uuids uuid)) + (begin + ;; (print "INFO: importing new artifact "uuid" from "artifact-file) + (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);" + uuid artifact) + (hash-table-set! uuids uuid #t))) + (print "Bad artifact in "artifact-file)))) + artifacts))))) + +;;====================================================================== +;; Artifacts db cache +;;====================================================================== + +;; artifacts +;; id SERIAL PRIMARY KEY, +;; uuid TEXT NOT NULL, +;; artifact TEXT NOT NULL +;; +;; parents +;; id INTEGER REFERENCES artids.id, -- +;; parent_id REFERENCES artids.id +;; +;; schema is list of SQL statements - can be used to extend db with more tables +;; +(define (open-artifacts-db dbpath dbfile #!key (schema '())) + (let* ((dbfname (conc dbpath "/" dbfile)) + (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f))) + (adb (open-database dbfname))) + (set-busy-handler! adb (make-busy-timeout 10000)) + (execute adb "PRAGMA synchronous = 0;") + (if (not dbexists) + (with-transaction + adb + (lambda () + (for-each + (lambda (stmt) + (execute adb stmt)) + (append `("CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER PRIMARY KEY, + uuid TEXT NOT NULL, + artifact TEXT NOT NULL)" + + "CREATE TABLE IF NOT EXISTS parents + (id INTEGER REFERENCES artifacts(id) NOT NULL, + parent_id INTEGER REFERENCES artifacts(id) NOT NULL)") + schema))))) + adb)) + +(define (generate-year-month-name #!optional (seconds #f)) + (let* ((curr-time (seconds->local-time (or seconds (current-seconds))))) + (time->string curr-time "%Y%m"))) + +;; I don't like this function. TODO: remove the +;; mode and option to return ht. Use instead the +;; get-all-artifacts below +;; +(define (get-all-uuids adb #!optional (mode #f)) + (let* ((res (fold-row + (lambda (res uuid) + (cons uuid res)) + '() + adb + "SELECT uuid FROM artifacts;"))) + (case mode + ((hash) + (let* ((ht (make-hash-table))) + (for-each + (lambda (uuid) + (hash-table-set! ht uuid #t)) + res) + ht)) + (else res)))) + +;; returns raw artifacts (i.e. NOT alists but instead plain text) +(define (get-all-artifacts adb) + (let* ((ht (make-hash-table))) + (for-each-row + (lambda (id uuid artifact) + (hash-table-set! ht uuid `(,id ,uuid ,artifact))) + adb + "SELECT id,uuid,artifact FROM artifacts;") + ht)) + +;; given a bundle-dir copy or create to /tmp and open +;; the YYMM.db file and hand the handle to the given proc +;; NOTE: we operate in /tmp/ to accomodate users on NFS +;; where slamming Unix locks at an NFS filer can cause +;; locking fails. Eventually this /tmp behavior will be +;; configurable. +;; +(define (with-todays-adb bundle-dir proc) + (let* ((dbname (conc (generate-year-month-name) ".db")) + (destname (conc bundle-dir"/"dbname)) + (tmparea (conc "/tmp/"(current-user-name)"-"(calc-sha1 bundle-dir))) + (tmpname (conc tmparea"/"dbname)) + (lockfile (conc destname".update-in-progress"))) + ;; (print "with-todays-adb, bundle-dir: "bundle-dir", dbname: "dbname", destname: "destname",\n tmparea: " tmparea", lockfile: "lockfile) + (if (not (file-exists? tmparea))(create-directory tmparea #t)) + (let loop ((count 0)) + (if (file-exists? lockfile) + (if (< count 30) ;; aproximately 30 seconds + (begin + (sleep 1) + (loop (+ 1 count))) + (print "ERROR: "lockfile" exists, proceeding anyway")) + (if (file-exists? destname) + (begin + (copy-file destname tmpname #t) + (copy-file destname lockfile #t))))) + (let* ((adb (open-artifacts-db tmparea dbname)) + (res (proc adb))) + (finalize! adb) + (copy-file tmpname destname #t) + (delete-file* lockfile) + res))) + +) ;; module artifacts + +;; ATTIC + ADDED artifacts/artifacts.setup Index: artifacts/artifacts.setup ================================================================== --- /dev/null +++ artifacts/artifacts.setup @@ -0,0 +1,11 @@ +;; Copyright 2007-2017, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;;; pkts.setup +(standard-extension 'pkts "1.0") ADDED artifacts/artifactsrec.scm Index: artifacts/artifactsrec.scm ================================================================== --- /dev/null +++ artifacts/artifactsrec.scm @@ -0,0 +1,196 @@ +(define-syntax define-record-type + (syntax-rules () + ((define-record-type type + (constructor constructor-tag ...) + predicate + (field-tag accessor . more) ...) + (begin + (define type + (make-record-type 'type '(field-tag ...))) + (define constructor + (record-constructor type '(constructor-tag ...))) + (define predicate + (record-predicate type)) + (define-record-field type field-tag accessor . more) + ...)))) + +; An auxilliary macro for define field accessors and modifiers. +; This is needed only because modifiers are optional. + +(define-syntax define-record-field + (syntax-rules () + ((define-record-field type field-tag accessor) + (define accessor (record-accessor type 'field-tag))) + ((define-record-field type field-tag accessor modifier) + (begin + (define accessor (record-accessor type 'field-tag)) + (define modifier (record-modifier type 'field-tag)))))) + +; Record types + +; We define the following procedures: +; +; (make-record-type ) -> +; (record-constructor ) -> +; (record-predicate ) -> +; (record-accessor ) -> +; (record-modifier ) -> +; where +; ( ...) -> +; ( ) -> +; ( ) -> +; ( ) -> + +; Record types are implemented using vector-like records. The first +; slot of each record contains the record's type, which is itself a +; record. + +(define (record-type record) + (record-ref record 0)) + +;---------------- +; Record types are themselves records, so we first define the type for +; them. Except for problems with circularities, this could be defined as: +; (define-record-type :record-type +; (make-record-type name field-tags) +; record-type? +; (name record-type-name) +; (field-tags record-type-field-tags)) +; As it is, we need to define everything by hand. + +(define :record-type (make-record 3)) +(record-set! :record-type 0 :record-type) ; Its type is itself. +(record-set! :record-type 1 ':record-type) +(record-set! :record-type 2 '(name field-tags)) + +; Now that :record-type exists we can define a procedure for making more +; record types. + +(define (make-record-type name field-tags) + (let ((new (make-record 3))) + (record-set! new 0 :record-type) + (record-set! new 1 name) + (record-set! new 2 field-tags) + new)) + +; Accessors for record types. + +(define (record-type-name record-type) + (record-ref record-type 1)) + +(define (record-type-field-tags record-type) + (record-ref record-type 2)) + +;---------------- +; A utility for getting the offset of a field within a record. + +(define (field-index type tag) + (let loop ((i 1) (tags (record-type-field-tags type))) + (cond ((null? tags) + (error "record type has no such field" type tag)) + ((eq? tag (car tags)) + i) + (else + (loop (+ i 1) (cdr tags)))))) + +;---------------- +; Now we are ready to define RECORD-CONSTRUCTOR and the rest of the +; procedures used by the macro expansion of DEFINE-RECORD-TYPE. + +(define (record-constructor type tags) + (let ((size (length (record-type-field-tags type))) + (arg-count (length tags)) + (indexes (map (lambda (tag) + (field-index type tag)) + tags))) + (lambda args + (if (= (length args) + arg-count) + (let ((new (make-record (+ size 1)))) + (record-set! new 0 type) + (for-each (lambda (arg i) + (record-set! new i arg)) + args + indexes) + new) + (error "wrong number of arguments to constructor" type args))))) + +(define (record-predicate type) + (lambda (thing) + (and (record? thing) + (eq? (record-type thing) + type)))) + +(define (record-accessor type tag) + (let ((index (field-index type tag))) + (lambda (thing) + (if (and (record? thing) + (eq? (record-type thing) + type)) + (record-ref thing index) + (error "accessor applied to bad value" type tag thing))))) + +(define (record-modifier type tag) + (let ((index (field-index type tag))) + (lambda (thing value) + (if (and (record? thing) + (eq? (record-type thing) + type)) + (record-set! thing index value) + (error "modifier applied to bad value" type tag thing))))) + +Records + +; This implements a record abstraction that is identical to vectors, +; except that they are not vectors (VECTOR? returns false when given a +; record and RECORD? returns false when given a vector). The following +; procedures are provided: +; (record? ) -> +; (make-record ) -> +; (record-ref ) -> +; (record-set! ) -> +; +; These can implemented in R5RS Scheme as vectors with a distinguishing +; value at index zero, providing VECTOR? is redefined to be a procedure +; that returns false if its argument contains the distinguishing record +; value. EVAL is also redefined to use the new value of VECTOR?. + +; Define the marker and redefine VECTOR? and EVAL. + +(define record-marker (list 'record-marker)) + +(define real-vector? vector?) + +(define (vector? x) + (and (real-vector? x) + (or (= 0 (vector-length x)) + (not (eq? (vector-ref x 0) + record-marker))))) + +; This won't work if ENV is the interaction environment and someone has +; redefined LAMBDA there. + +(define eval + (let ((real-eval eval)) + (lambda (exp env) + ((real-eval `(lambda (vector?) ,exp)) + vector?)))) + +; Definitions of the record procedures. + +(define (record? x) + (and (real-vector? x) + (< 0 (vector-length x)) + (eq? (vector-ref x 0) + record-marker))) + +(define (make-record size) + (let ((new (make-vector (+ size 1)))) + (vector-set! new 0 record-marker) + new)) + +(define (record-ref record index) + (vector-ref record (+ index 1))) + +(define (record-set! record index value) + (vector-set! record (+ index 1) value)) ADDED artifacts/tests/run.scm Index: artifacts/tests/run.scm ================================================================== --- /dev/null +++ artifacts/tests/run.scm @@ -0,0 +1,139 @@ +(use test) + +;; (use (prefix pkts pkts:)) +(use pkts (prefix dbi dbi:)) +;; (use trace)(trace sdat->alist pkt->alist) + +(if (file-exists? "queue.db")(delete-file "queue.db")) + +(test-begin "pkts and pkt archives") + +;;====================================================================== +;; Basic pkt creation, parsing and conversion routines +;;====================================================================== + +(test-begin "basic packets") +(test #f '(A "This is a packet") (let-values (((t v) + (card->type/value "A This is a packet"))) + (list t v))) +(test #f "A A\nZ 664449e7299e0065a3e25c138ccef2df13ba291e" + (let-values (((uuid res) + (add-z-card '("A A")))) + res)) +(test #f '("CC C++" "D 1486332719.0" "a A" "b C")(sort (construct-sdat 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0) + string<=?)) +(define pkt-example #f) +(test #f "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" + (let-values (((uuid res) + (construct-pkt 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0))) + (set! pkt-example (cons uuid res)) + res)) +(test-end "basic packets") + +;;====================================================================== +;; Sqlite and postgresql based queue of pkts +;;====================================================================== + +(test-begin "pkt queue") +(define db #f) +(test #f 'sqlite3 (let ((dbh (open-queue-db "." "queue.db"))) + (set! db dbh) + (dbi:db-dbtype dbh))) +(test #f (cdr pkt-example) + (begin + (add-to-queue db (cdr pkt-example)(car pkt-example) 'basic #f 0) + (lookup-by-uuid db (car pkt-example) 0))) +(test #f (cdr pkt-example) + (lookup-by-id db 1)) +(test #f 1 (length (find-pkts db '(basic) '()))) + +(test-end "pkt queue") + + +;;====================================================================== +;; Process groups of pkts +;;====================================================================== + +(test-begin "lists of packets") +(test #f '((apkt . #f) (id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) ;; ((id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) + (dblst->dpkts '(1 2 3 4 5))) +(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0))) + ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + (get-dpkts db '(basic) 0 #f)) +(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0))) + ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + (get-not-processed-pkts db 0 'basic 1000 0)) +(test-end "lists of packets") + +(test-begin "pkts as alists") +(define pktspec '((posting . ((title . t) ;; NOTE: same as ((posting (title . t)(url . u)(blub . b)) ... + (url . u) + (blurb . b))) + (comment . ((comment . c) + (score . s))) + (basic . ((b-field . b) + (a-field . a))))) +(define pktlst (find-pkts db '(basic) '())) +(define dpkt (car pktlst)) +(test #f "A" (get-value 'a-field dpkt pktspec)) + +(test #f "C" (alist-ref 'b-field (dpkt->alist dpkt pktspec))) + +(define basic-spec '((nada (foo . b)(bar . f))(basic (foo . f)(bar . b)))) +(define test-pkt '((foo . "fooval")(bar . "barval"))) +(let*-values (((u p) (alist->pkt test-pkt basic-spec ptype: 'basic)) + ((apkt) (pkt->alist p)) + ((bpkt) (pkt->alist p pktspec: basic-spec))) + (test #f "fooval" (alist-ref 'f apkt)) + (test #f "fooval" (alist-ref 'foo bpkt)) + (test #f #f (alist-ref 'f bpkt))) + +(test-end "pkts as alists") + +(test-begin "descendents and ancestors") + +(define (get-uuid pkt)(alist-ref 'uuid pkt)) + +;; add a child to 263e +(let-values (((uuid pkt) + (construct-pkt 'x "X" 'y "Y" 'P "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" + 'D "1486332719.0"))) + (add-to-queue db pkt uuid 'basic "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" 0)) + +(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8") + (map (lambda (x)(alist-ref 'uuid x)) + (get-descendents + db 0 + "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + +(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8") + (map (lambda (x)(alist-ref 'uuid x)) + (get-ancestors + db 0 + "818fe30988c9673441b8f203972a8bda6af682f8"))) + +(test-end "descendents and ancestors") + +(test-end "pkts and pkt archives") + +(test-begin "pktsdb") + +(define spec '((tests (testname n TEXT) + (testpath p TEXT) + (duration d INTEGER)))) +;; (define pktsdb (make-pktdb)) +;; (pktdb-pktsdb-spec-set! pktsdb spec) + +(define pktsdb #f) + +(test #f #t (dbi:database? (let ((pdb (pktsdb-open "test.db" spec))) + (set! pktsdb pdb) + (pktdb-conn pdb)))) +;; (pp (pktdb-pktspec pktsdb)) +(test #f #t (pktsdb-add-record pktsdb 'tests '((testname . "test1")))) + +(pktsdb-close pktsdb) + +(test-end "pktsdb") ADDED attic/client.scm Index: attic/client.scm ================================================================== --- /dev/null +++ attic/client.scm @@ -0,0 +1,46 @@ + +;; Copyright 2006-2012, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +(declare (unit client)) + +(declare (uses common)) +(declare (uses db)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +(declare (uses commonmod)) + +(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5 + message-digest matchable spiffy uri-common intarweb http-client + spiffy-request-vars uri-common intarweb directory-utils) + +(import commonmod + debugprint) + +(module client +* + +) + +(import client) + +(include "common_records.scm") +(include "db_records.scm") + ADDED attic/http-transport.scm Index: attic/http-transport.scm ================================================================== --- /dev/null +++ attic/http-transport.scm @@ -0,0 +1,708 @@ + +;; Copyright 2006-2012, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +(declare (unit http-transport)) + +(declare (uses common)) +(declare (uses debugprint)) +(declare (uses db)) +(declare (uses tests)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +(declare (uses server)) +;; (declare (uses daemon)) +(declare (uses portlogger)) +(declare (uses rmt)) +(declare (uses dbfile)) +(declare (uses commonmod)) +(declare (uses mtargs)) + +(module http-transport +* + + +(import srfi-1 posix regex regex-case srfi-69 hostinfo md5 + message-digest posix-extras spiffy uri-common intarweb http-client + spiffy-request-vars intarweb spiffy-directory-listing + (srfi 18) extras tcp s11n) + +(import scheme + chicken + + (prefix mtargs args:) + debugprint) + +;; Configurations for server +(tcp-buffer-size 2048) +(max-connections 2048) + +(include "common_records.scm") +(include "db_records.scm") +(include "js-path.scm") + +(import dbfile commonmod) + +(require-library stml) +(define (http-transport:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) + +;;====================================================================== +;; S E R V E R +;; ====================================================================== + +;; Call this to start the actual server +;; + +(define *db:process-queue-mutex* (make-mutex)) + +(define (http-transport:run hostn) + ;; Configurations for server + (tcp-buffer-size 2048) + (max-connections 2048) + (debug:print 2 *default-log-port* "Attempting to start the server ...") + (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily + (hostname (get-host-name)) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + (server:get-best-guess-address hostname) + #f))) + (if ipstr ipstr hostn))) ;; hostname))) + (start-port (portlogger:open-run-close portlogger:find-port)) + (link-tree-path (common:get-linktree)) + (tmp-area (common:get-db-tmp-area)) + (start-file (conc tmp-area "/.server-start"))) + (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) + ;; set some parameters for the server + (root-path (if link-tree-path + link-tree-path + (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! + (handle-directory spiffy-directory-listing) + (handle-exception (lambda (exn chain) + (signal (make-composite-condition + (make-property-condition + 'server + 'message "server error"))))) + + ;; http-transport:handle-directory) ;; simple-directory-handler) + ;; Setup the web server and a /ctrl interface + ;; + (vhost-map `(((* any) . ,(lambda (continue) + ;; open the db on the first call + ;; This is were we set up the database connections + (let* (($ (request-vars source: 'both)) + (dat ($ 'dat)) + (res #f)) + (cond + ((equal? (uri-path (request-uri (current-request))) + '(/ "api")) + (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc + headers: '((content-type text/plain))) + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*)) + ((equal? (uri-path (request-uri (current-request))) + '(/ "")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "json_api")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "runs")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ any)) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "hey")) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "jquery3.1.0.js")) + (send-response body: (http-transport:show-jquery) + headers: '((content-type application/javascript)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "test_log")) + (send-response body: (http-transport:html-test-log $) + headers: '((content-type text/HTML)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "dashboard")) + (send-response body: (http-transport:html-dboard $) + headers: '((content-type text/HTML)))) + (else (continue)))))))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) + (with-output-to-file start-file (lambda ()(print (current-process-id))))) + (http-transport:try-start-server ipaddrstr start-port))) + +;; This is recursively run by http-transport:run until sucessful +;; +(define (http-transport:try-start-server ipaddrstr portnum) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) + (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) + (if (not config-use-proxy) + (determine-proxy (constantly #f))) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) + (handle-exceptions + exn + (begin + ;; (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + (thread-sleep! 0.1) + + ;; get_next_port goes here + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server")))) + ;; any error in following steps will result in a retry + (set! *server-info* (list ipaddrstr portnum)) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + ;; This starts the spiffy server + ;; NEED WAY TO SET IP TO #f TO BIND ALL + ;; (start-server bind-address: ipaddrstr port: portnum) + (if config-hostname ;; this is a hint to bind directly + (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-") + ;; ipaddrstr + ;; config-hostname)) + (start-server port: portnum)) + (portlogger:open-run-close portlogger:set-port portnum "released") + (debug:print 1 *default-log-port* "INFO: server has been stopped")))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +(define *http-mutex* (make-mutex)) + +;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here +;; I'm pretty sure it is defunct. + +;; This next block all imported en-mass from the api branch +(define *http-requests-in-progress* 0) +(define *http-connections-next-cleanup* (current-seconds)) + +(define (http-transport:get-time-to-cleanup) + (let ((res #f)) + (mutex-lock! *http-mutex*) + (set! res (> (current-seconds) *http-connections-next-cleanup*)) + (mutex-unlock! *http-mutex*) + res)) + +(define (http-transport:inc-requests-count) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) + ;; Use this opportunity to slow things down iff there are too many requests in flight + (if (> *http-requests-in-progress* 5) + (begin + (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") + (thread-sleep! 1))) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count proc) + (mutex-lock! *http-mutex*) + (proc) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count-and-close-all-connections) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds + (if (> *http-requests-in-progress* 0) + (if (> etime (current-seconds)) + (begin + (thread-sleep! 0.05) + (loop etime)) + (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) + (close-all-connections!))) + (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:inc-requests-and-prep-to-close-all-connections) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) + +;; Send "cmd" with json payload "params" to serverdat and receive result +;; +(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3)) + (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat) + (let* ((fullurl (remote-api-req runremote)) + (res (vector #f "uninitialized")) + (success #t) + (sparams (db:obj->string params transport: 'http)) + (server-id (remote-server-id runremote))) + (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) + + ;; set up the http-client here + (max-retry-attempts 1) + ;; consider all requests indempotent + (retry-request? (lambda (request) + #f)) + ;; send the data and get the response + ;; extract the needed info from the http data and + ;; process and return it. + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) + ;; ((exn http client-error) e (print e))) + (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. + success + (db:string->obj + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (set! success #f) + (if (debug:debug-mode 1) + (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") + (begin + (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") + (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) + (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) + (debug:print 0 *default-log-port* " call-chain: " call-chain))) + ;; what if another thread is communicating ok? Can't happen due to mutex + (http-transport:close-connections runremote) + (mutex-unlock! *http-mutex*) + ;; (close-connection! fullurl) + (db:obj->string #f)) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key (or server-id "thekey")) + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) + transport: 'http) + 0)) ;; added this speculatively + ;; Shouldn't this be a call to the managed call-all-connections stuff above? + ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections? + (mutex-unlock! *http-mutex*) + )) + (time-out (lambda () + (thread-sleep! 45) + (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") + #f)) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (vector-set! res 0 success) + (thread-terminate! th2) + (if (vector? res) + (if (vector-ref res 0) ;; this is the first flag or the second flag? + (let* ((res-dat (vector-ref res 1))) + (if (and (string? res-dat) (string-contains res-dat "server-id mismatch")) + (signal (make-composite-condition + (make-property-condition + 'servermismatch + 'message (vector-ref res 1)))) + res)) ;; this is the *inner* vector? seriously? why? + (if (debug:debug-mode 11) + (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it + (print-call-chain (current-error-port)) + (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 11 *default-log-port* " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref res 0))) + res)) + (signal (make-composite-condition + (make-property-condition + 'timeout + 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) + +;; careful closing of connections stored in *runremote* +;; +(define (http-transport:close-connections runremote) + (if (remote? runremote) + (let ((api-dat (remote-api-uri runremote))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (if (args:any-defined? "-server" "-execute" "-run") + (debug:print-info 0 *default-log-port* "Closing connections to "api-dat)) + (if api-dat (close-connection! api-dat)) + + ;; Would it be better to set *runremote* to #f? I don't think so. But we may + ;; need to clear more of the runremote fields + (remote-api-url-set! runremote #f) ;; used as a flag for connection up and running + + #t)) + #f)) + +;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (http-transport:keep-running) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + (let* ((servinfofile #f) + (sdat #f) + (no-sync-db (db:open-no-sync-db)) + (tmp-area (common:get-db-tmp-area)) + (started-file (conc tmp-area "/.server-started")) + (server-start-time (current-seconds)) + (server-info (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo")) + (ipaddr (car sdat)) + (port (cadr sdat)) + (servinf (conc servinfodir"/"ipaddr":"port))) + (set! servinfofile servinf) + (if (not (file-exists? servinfodir)) + (create-directory servinfodir #t)) + (with-output-to-file servinf + (lambda () + (let* ((serv-id (server:mk-signature))) + (set! *server-id* serv-id) + (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)) + (print "started: "(seconds->year-week/day-time (current-seconds)))))) + (set! *on-exit-procs* (cons + (lambda () + (delete-file* servinf)) + *on-exit-procs*)) + ;; put data about this server into a simple flat file host.port + (debug:print-info 0 *default-log-port* "Received server alive signature") + sdat) + (begin + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (if sdat + (let* ((ipaddr (car sdat)) + (port (cadr sdat)) + (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port))) + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (exit)) + (exit) + ) + (loop start-time + (equal? sdat last-sdat) + sdat))))))) + (iface (car server-info)) + (port (cadr server-info)) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (server-going #f) + (server-log-file (args:get-arg "-log"))) ;; always set when we are a server + + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) + (with-output-to-file started-file (lambda ()(print (current-process-id))))) + + (let loop ((count 0) + (server-state 'available) + (bad-sync-count 0) + (start-time (current-milliseconds))) + + ;; Use this opportunity to sync the tmp db to megatest.db + (if (not server-going) ;; *dbstruct-dbs* + (begin + (debug:print 0 *default-log-port* "SERVER: dbprep") + (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!! + (set! server-going #t) + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (if (and no-sync-db + (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :) + (begin + (if (common:low-noise-print 120 "sync-all-print") + (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))) + (db:all-db-sync *dbstruct-dbs*) + ))) + + ;; when things go wrong we don't want to be doing the various queries too often + ;; so we strive to run this stuff only every four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) + + ;; Check that iface and port have not changed (can happen if server port collides) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + + (if (not (equal? sdat (list iface port))) + (let ((new-iface (car sdat)) + (new-port (cadr sdat))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (set! iface new-iface) + (set! port new-port) + (if (not *server-id*) + (set! *server-id* (server:mk-signature))) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) + (flush-output *default-log-port*))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (mutex-lock! *heartbeat-mutex*) + (set! last-access *db-last-access*) + (mutex-unlock! *heartbeat-mutex*) + + (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) + (begin + (if (not *server-id*) + (set! *server-id* (server:mk-signature))) + (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) + (flush-output *default-log-port*))) + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (let ((curr-time (current-seconds))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn) + (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter + (not *server-overloaded*) + (file-exists? servinfofile)) + (change-file-times servinfofile curr-time curr-time))) + (if (and (common:low-noise-print 120 "start new server") + (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another + (begin + (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...") + (server:kind-run *toppath*) + (if (> *api-process-request-count* 100) + (begin + (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile) + (delete-file* servinfofile))))))) + (loop 0 server-state bad-sync-count (current-milliseconds))) + (else + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (http-transport:server-shutdown port))))))) + +(define (http-transport:server-shutdown port) + (begin + ;;(BB> "http-transport:server-shutdown called") + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; + ;; start_shutdown + ;; + (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up + (portlogger:open-run-close portlogger:set-port port "released") + (thread-sleep! 1) + + ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) + ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) + ;; (debug:print-info 0 *default-log-port* "Average cached write time " + ;; (if (eq? *number-of-writes* 0) + ;; "n/a (no writes)" + ;; (/ *writes-total-delay* + ;; *number-of-writes*)) + ;; " ms") + ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) + ;; (debug:print-info 0 *default-log-port* "Average non-cached time " + ;; (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + ;; " ms") + + (db:print-current-query-stats) + #;(common:save-pkt `((action . exit) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + + ;; remove .servinfo file(s) here + + (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") + (exit))) + +;; all routes though here end in exit ... +;; +;; start_server? +;; +(define (http-transport:launch) + ;; check the .servinfo directory, are there other servers running on this + ;; or another host? + (let* ((server-start-is-ok (server:minimal-check *toppath*))) + (if (not server-start-is-ok) + (begin + (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.") + (exit 1)))) + + ;; check that a server start is in progress, pause or exit if so + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit))) + +;; (define (http-transport:server-signal-handler signum) +;; (signal-mask! signum) +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* " ... exiting ...") +;; (let ((th1 (make-thread (lambda () +;; (thread-sleep! 1)) +;; "eat response")) +;; (th2 (make-thread (lambda () +;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") +;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff +;; (debug:print 0 *default-log-port* " Done.") +;; (exit 4)) +;; "exit on ^C timer"))) +;; (thread-start! th2) +;; (thread-start! th1) +;; (thread-join! th2)))) + +;;=============================================== +;; Java script +;;=============================================== +(define (http-transport:show-jquery) + (let* ((data (tests:readlines *java-script-lib*))) +(string-join data "\n"))) + + + +;;====================================================================== +;; web pages +;;====================================================================== + +(define (http-transport:html-test-log $) + (let* ((run-id ($ 'runid)) + (test-item ($ 'testname)) + (parts (string-split test-item ":")) + (test-name (car parts)) + + (item-name (if (equal? (length parts) 1) + "" + (cadr parts)))) + ;(print $) +(tests:get-test-log run-id test-name item-name))) + + +(define (http-transport:html-dboard $) + (let* ((page ($ 'page)) + (oup (open-output-string)) + (bdy "--------------------------") + + (ret (tests:dynamic-dboard page))) + (s:output-new oup ret) + (close-output-port oup) + + (set! bdy (get-output-string oup)) + (conc "

Dashboard

" bdy "

" ))) + +(define (http-transport:main-page) + (let ((linkpath (root-path))) + (conc "

" (pathname-strip-directory *toppath*) "

" + "" + "Run area: " *toppath* + "

Server Stats

" + (http-transport:stats-table) + "
" + (http-transport:runs linkpath) + "
" + ;; (http-transport:run-stats) + "" + ))) + +(define (http-transport:stats-table) + (mutex-lock! *heartbeat-mutex*) + (let ((res + (conc "" + ;; "" + "" + "" + "" + ;; "" + "" + "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) + "n/a (no writes)" + (/ *writes-total-delay* + *number-of-writes*)) + " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + " ms
Last access" (seconds->time-string *db-last-access*) "
"))) + (mutex-unlock! *heartbeat-mutex*) + res)) + +(define (http-transport:runs linkpath) + (conc "

Runs

" + (string-intersperse + (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) + (map (lambda (p) + (conc "" p "
")) + files)) + " "))) + +#;(define (http-transport:run-stats) + (let ((stats (open-run-close db:get-running-stats #f))) + (conc "" + (string-intersperse + (map (lambda (stat) + (conc "")) + stats) + " ") + "
" (car stat) "" (cadr stat) "
"))) +) ADDED attic/index-tree.scm Index: attic/index-tree.scm ================================================================== --- /dev/null +++ attic/index-tree.scm @@ -0,0 +1,61 @@ +;;====================================================================== +;; Copyright 2006-2013, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; +;;====================================================================== + +;;====================================================================== +;; Tests +;;====================================================================== + +(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit tests)) +(declare (uses lock-queue)) +(declare (uses db)) +(declare (uses common)) +(declare (uses items)) +(declare (uses runconfig)) +(declare (uses commonmod)) +(import commonmod) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") +(include "test_records.scm") + +;; Populate the links tree with index.html files +;; +;; - start from most recent tests and work towards oldest -OR- +;; start from deepest hierarchy and work way up +;; - look up tests in megatest.db +;; - cross-reference the tests to stats.db +;; - if newer than event_time in stats.db or not registered in stats.db regenerate +;; - run du and store in stats.db +;; - when all tests at that level done generate next level up index.html +;; +;; include in rollup html index.html: +;; sum of du +;; counts of PASS, FAIL, RUNNING, REMOTEHOSTSTART, LAUNCHED, CHECK etc. +;; overall status +;; +;; include in test specific index.html: +;; host, uname, cpu graph, disk avail graph, steps, data +;; meta data, state, status, du +;; ADDED attic/lock-queue.scm Index: attic/lock-queue.scm ================================================================== --- /dev/null +++ attic/lock-queue.scm @@ -0,0 +1,258 @@ +;; Copyright 2006-2013, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +(use (prefix sqlite3 sqlite3:) srfi-18) + +(declare (unit lock-queue)) +(declare (uses common)) +(declare (uses debugprint)) +(declare (uses tasks)) +(declare (uses commonmod)) + +(import commonmod + debugprint) + +;;====================================================================== +;; attempt to prevent overlapping updates of rollup files by queueing +;; update requests in an sqlite db +;;====================================================================== + +;;====================================================================== +;; db record, +;;====================================================================== + +(define (make-lock-queue:db-dat)(make-vector 3)) +(define-inline (lock-queue:db-dat-get-db vec) (vector-ref vec 0)) +(define-inline (lock-queue:db-dat-get-path vec) (vector-ref vec 1)) +(define-inline (lock-queue:db-dat-set-db! vec val)(vector-set! vec 0 val)) +(define-inline (lock-queue:db-dat-set-path! vec val)(vector-set! vec 1 val)) + +(define (lock-queue:delete-lock-db dbdat) + (let ((fname (lock-queue:db-dat-get-path dbdat))) + (system (conc "rm -f " fname "*")))) + +(define (lock-queue:open-db fname #!key (count 10)) + (let* ((actualfname (conc fname ".lockdb")) + (dbexists (common:file-exists? actualfname)) + (db (sqlite3:open-database actualfname)) + (handler (make-busy-timeout 136000))) + (if dbexists + (vector db actualfname) + (begin + (handle-exceptions + exn + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:open-db fname count: (- count 1)) + (vector db actualfname))) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS queue ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + start_time INTEGER, + state TEXT, + CONSTRAINT queue_constraint UNIQUE (test_id));") + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS runlocks ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + run_lock TEXT, + CONSTRAINT runlock_constraint UNIQUE (run_lock));")))))) + (sqlite3:set-busy-handler! db handler) + (vector db actualfname))) + +(define (lock-queue:set-state dbdat test-id newstate #!key (remtries 10)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 30) + (lock-queue:set-state dbdat test-id newstate remtries: (- remtries 1))) + (begin + (debug:print-error 0 *default-log-port* " Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") + #f)) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "UPDATE queue SET state=? WHERE test_id=?;" + newstate + test-id))) + +(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10)) + ;; no need to wait on journal on read only queries + ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:any-younger. Removing lockdb and trying again in 5 seconds.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 5) + (lock-queue:delete-lock-db dbdat) + (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1))) + (begin + (debug:print-error 0 *default-log-port* " Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") + #f)) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (tid) + ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as + (if (not (equal? tid test-id)) + (set! res tid))) + (lock-queue:db-dat-get-db dbdat) + "SELECT test_id FROM queue WHERE start_time > ?;" mystart) + res))) + +(define (lock-queue:get-lock dbdat test-id #!key (count 10)(waiting-msg #f)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 remove: #t waiting-msg: "lock-queue:get-lock, waiting on journal") + (let* ((res #f) + (db (lock-queue:db-dat-get-db dbdat)) + (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) + (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) + (let ((result + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 10) + ;; (if (> count 0) + ;; #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries + ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained + (lock-queue:delete-lock-db dbdat) + #f) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tid lockstate) + (set! res (list tid lockstate))) + lckqry) + (if res + (if (equal? (car res) test-id) + #t ;; already have the lock + #f) + (begin + (sqlite3:execute mklckqry test-id) + ;; if no error handled then return #t for got the lock + #t))))))) + (sqlite3:finalize! lckqry) + (sqlite3:finalize! mklckqry) + result))) + +(define (lock-queue:release-lock fname test-id #!key (count 10)) + (let* ((dbdat (lock-queue:open-db fname))) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:release-lock; waiting on journal") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to release queue lock. Will try again in few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! (/ count 10)) + (if (> count 0) + (begin + (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)) + (lock-queue:release-lock fname test-id count: (- count 1))) + (let ((journal (conc fname "-journal"))) + ;; If we've tried ten times and failed there is a serious problem + ;; try to remove the lock db and allow it to be recreated + (handle-exceptions + exn + #f + (if (common:file-exists? journal)(delete-file journal)) + (if (common:file-exists? fname) (delete-file fname)) + #f)))) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id) + (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))))) + +(define (lock-queue:steal-lock dbdat test-id #!key (count 10)) + (debug:print-info 0 *default-log-port* "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to steal queue lock. Will try again in few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 10) + (if (> count 0) + (lock-queue:steal-lock dbdat test-id count: (- count 1)) + #f)) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE run_lock='locked';")) + (lock-queue:get-lock dbdat test-it)) + +;; returns #f if ok to skip the task +;; returns #t if ok to proceed with task +;; otherwise waits +;; +(define (lock-queue:wait-turn fname test-id #!key (count 10)(waiting-msg #f)) + (let* ((dbdat (lock-queue:open-db fname)) + (mystart (current-seconds)) + (db (lock-queue:db-dat-get-db dbdat))) + ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (thread-sleep! 10) + (if (> count 0) + (begin + (sqlite3:finalize! db) + (lock-queue:wait-turn fname test-id count: (- count 1))) + (begin + (debug:print 0 *default-log-port* "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain") + (print-call-chain (current-error-port)) + #f))) + ;; wait 10 seconds and then check to see if someone is already updating the html + (thread-sleep! 10) + (if (not (lock-queue:any-younger? dbdat mystart test-id)) ;; no processing in flight, must try to start processing + (begin + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") + (sqlite3:execute + db + "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" + test-id mystart) + ;; (thread-sleep! 1) ;; give other tests a chance to register + (let ((result + (let loop ((younger-waiting (lock-queue:any-younger? dbdat mystart test-id))) + (if younger-waiting + (begin + ;; no need for us to wait. mark in the lock queue db as skipping + ;; no point in marking anything in the queue - simply never register this + ;; test as it is *covered* by a previously started update to the html file + ;; (lock-queue:set-state dbdat test-id "skipping") + #f) ;; let the calling process know that nothing needs to be done + (if (lock-queue:get-lock dbdat test-id) + #t + (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock + (lock-queue:steal-lock dbdat test-id) + (begin + (thread-sleep! 1) + (loop (lock-queue:any-younger? dbdat mystart test-id))))))))) + (sqlite3:finalize! db) + result)))))) + + +;; (use trace) +;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) ADDED attic/mlaunch.scm Index: attic/mlaunch.scm ================================================================== --- /dev/null +++ attic/mlaunch.scm @@ -0,0 +1,35 @@ +;; Copyright 2006-2014, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + +;;====================================================================== +;; MLAUNCH +;; +;; take jobs from the given queue and keep launching them keeping +;; the cpu load at the targeted level +;; +;;====================================================================== + +(use sqlite3 srfi-1 posix regex regex-case srfi-69 format) + +(declare (unit mlaunch)) +(declare (uses db)) +(declare (uses common)) +(declare (uses commonmod)) +(import commonmod) + ADDED attic/portlogger-example.scm Index: attic/portlogger-example.scm ================================================================== --- /dev/null +++ attic/portlogger-example.scm @@ -0,0 +1,21 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + + +(declare (uses portlogger)) + +(print (apply portlogger:main (cdr (argv)))) ADDED attic/synchash.scm Index: attic/synchash.scm ================================================================== --- /dev/null +++ attic/synchash.scm @@ -0,0 +1,137 @@ +;;====================================================================== +;; Copyright 2006-2012, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; +;;====================================================================== + +;;====================================================================== +;; A hash of hashes that can be kept in sync by sending minial deltas +;;====================================================================== + +(use format) +(use srfi-1 srfi-69 sqlite3) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit synchash)) +(declare (uses db)) +(declare (uses server)) +(declare (uses rmtmod)) + +(include "db_records.scm") + +(import rmtmod) + +(define (synchash:make) + (make-hash-table)) + +;; given an alist of objects '((id obj) ...) +;; 1. remove unchanged objects from the list +;; 2. create a list of removed objects by id +;; 3. remove removed objects from synchash +;; 4. replace or add new or changed objects to synchash +;; +(define (synchash:get-delta indat synchash) + (let ((deleted '()) + (changed '()) + (found '()) + (orig-keys (hash-table-keys synchash))) + (for-each + (lambda (item) + (let* ((id (car item)) + (dat (cadr item)) + (ref (hash-table-ref/default synchash id #f))) + (if (not (equal? dat ref)) ;; item changed or new + (begin + (set! changed (cons item changed)) + (hash-table-set! synchash id dat))) + (set! found (cons id found)))) + indat) + (for-each + (lambda (id) + (if (not (member id found)) + (begin + (set! deleted (cons id deleted)) + (hash-table-delete! synchash id)))) + orig-keys) + (list changed deleted) + ;; (list indat '()) ;; just for debugging + )) + +;; keynum => the field to use as the unique key (usually 0 but can be other field) +;; +(define (synchash:client-get proc synckey keynum synchash run-id . params) + (let* ((data (rmt:synchash-get run-id proc synckey keynum params)) + (newdat (car data)) + (removs (cadr data)) + (myhash (hash-table-ref/default synchash synckey #f))) + (if (not myhash) + (begin + (set! myhash (make-hash-table)) + (hash-table-set! synchash synckey myhash))) + (for-each + (lambda (item) + (let ((id (car item)) + (dat (cadr item))) + ;; (debug:print-info 2 *default-log-port* "Processing item: " item) + (hash-table-set! myhash id dat))) + newdat) + (for-each + (lambda (id) + (hash-table-delete! myhash id)) + removs) + ;; WHICH ONE!? + ;; data)) ;; return the changed and deleted list + (list newdat removs))) ;; synchash)) + +(define *synchashes* (make-hash-table)) + +(define (synchash:server-get dbstruct run-id proc synckey keynum params) + ;; (debug:print-info 2 *default-log-port* "synckey: " synckey ", keynum: " keynum ", params: " params) + (let* ((dbdat (db:get-db dbstruct run-id)) + (db (db:dbdat-get-db dbdat)) + (synchash (hash-table-ref/default *synchashes* synckey #f)) + (newdat (apply (case proc + ((db:get-runs) db:get-runs) + ((db:get-tests-for-run-mindata) db:get-tests-for-run-mindata) + ((db:get-test-info-by-ids) db:get-test-info-by-ids) + (else + (print "ERROR: sync for hash " proc " not setup! Edits needed in synchash.scm") + print)) + db params)) + (postdat #f) + (make-indexed (lambda (x) + (list (vector-ref x keynum) x)))) + ;; Now process newdat based on the query type + (set! postdat (case proc + ((db:get-runs) + ;; (debug:print-info 2 *default-log-port* "Get runs call") + (let ((header (vector-ref newdat 0)) + (data (vector-ref newdat 1))) + ;; (debug:print-info 2 *default-log-port* "header: " header ", data: " data) + (cons (list "header" header) ;; add the header keyed by the word "header" + (map make-indexed data)))) ;; add each element keyed by the keynum'th val + (else + ;; (debug:print-info 2 *default-log-port* "Non-get runs call") + (map make-indexed newdat)))) + ;; (debug:print-info 2 *default-log-port* "postdat: " postdat) + ;; (if (not indb)(sqlite3:finalize! db)) + (if (not synchash) + (begin + (set! synchash (make-hash-table)) + (hash-table-set! *synchashes* synckey synchash))) + (synchash:get-delta postdat synchash))) + DELETED client.scm Index: client.scm ================================================================== --- client.scm +++ /dev/null @@ -1,128 +0,0 @@ - -;; Copyright 2006-2012, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -;;====================================================================== -;; C L I E N T S -;;====================================================================== - -(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5 - message-digest matchable spiffy uri-common intarweb http-client - spiffy-request-vars uri-common intarweb directory-utils) - -(declare (unit client)) - -(declare (uses common)) -(declare (uses db)) -(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. - -(include "common_records.scm") -(include "db_records.scm") - -;; client:get-signature -(define (client:get-signature) - (if *my-client-signature* *my-client-signature* - (let ((sig (conc (get-host-name) " " (current-process-id)))) - (set! *my-client-signature* sig) - *my-client-signature*))) - -;; Not currently used! But, I think it *should* be used!!! -#;(define (client:logout serverdat) - (let ((ok (and (socket? serverdat) - (cdb:logout serverdat *toppath* (client:get-signature))))) - ok)) - -(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) - (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) - -;; Do all the connection work, look up the transport type and set up the -;; connection if required. -;; -;; There are two scenarios. -;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline -;; 2. We are a run tests, list runs or other interactive process and we must figure out -;; *transport-type* and *runremote* from the monitor.db -;; -;; client:setup -;; -;; lookup_server, need to remove *runremote* stuff -;; - -(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) - (mutex-lock! *rmt-mutex*) - (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) - (mutex-unlock! *rmt-mutex*) - res)) - -(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) - (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) - (server:start-and-wait areapath) - (if (<= remaining-tries 0) - (begin - (debug:print-error 0 *default-log-port* "failed to start or connect to server") - (exit 1)) - ;; - ;; Alternatively here, we can get the list of candidate servers and work our way - ;; through them searching for a good one. - ;; - (let* ((server-dat (server:choose-server areapath 'best)) - (runremote (or area-dat *runremote*))) - (if (not server-dat) ;; no server found - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) - (match server-dat - ((host port start-time server-id pid) - (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if (and (not area-dat) - (not *runremote*)) - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))))) - (if (and host port server-id) - (let* ((start-res (http-transport:client-connect host port server-id)) - (ping-res (rmt:login-no-auto-client-setup start-res))) - (if (and start-res - ping-res) - (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago - (if runremote - (begin - (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) - (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) - start-res) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))) - (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 - (case *transport-type* - ((http)(http-transport:close-connections))) - (if *runremote* - (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) - ) - (thread-sleep! 1) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) - ))) - (begin ;; no server registered - ;; (server:kind-run areapath) - (server:start-and-wait areapath) - (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) - (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))) - (else - (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat))))))) - Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -15,23 +15,32 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== + +(declare (unit common)) +(declare (uses commonmod)) +(declare (uses rmtmod)) +(declare (uses debugprint)) +(declare (uses mtargs)) (use srfi-1 data-structures posix regex-case (prefix base64 base64:) format dot-locking csv-xml z3 udp ;; sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack matchable regex posix (srfi 18) extras ;; tcp (prefix nanomsg nmsg:) (prefix sqlite3 sqlite3:) pkts (prefix dbi dbi:) ) +(use posix-extras pathname-expand files) -(declare (unit common)) -(declare (uses commonmod)) -(import commonmod) + +(import commonmod + debugprint + rmtmod + (prefix mtargs args:)) (include "common_records.scm") ;; (require-library margs) @@ -47,12 +56,14 @@ (define (stop-the-train) (thread-start! (make-thread (lambda () (let loop () (if (and *toppath* (file-exists? (conc *toppath*"/stop-the-train"))) - (begin - (debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately") + (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) + ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think + (print msg) + (debug:print 0 *default-log-port* msg) (exit 1))) (thread-sleep! 5) (loop)))))) ;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . @@ -161,19 +172,16 @@ ;; (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* ;; task db (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access ;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile -(define *db-transaction-mutex* (make-mutex)) +;; (define *db-transaction-mutex* (make-mutex)) (define *db-cache-path* #f) ;; (define *db-with-db-mutex* (make-mutex)) (define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) -;; no sync db -;; (define *no-sync-db* #f) ;; moved to dbfile ;; SERVER -(define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg (define *runremote* #f) ;; if set up for server communication this will hold ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *server-id* #f) @@ -210,12 +218,10 @@ ;; Miscellaneous (define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers (define *numcpus-cache* (make-hash-table)) -(use posix-extras pathname-expand files) - ;; this plugs a hole in posix-extras in recent chicken versions > 4.9) (let-values (( (chicken-release-number chicken-major-version) (apply values (map string->number (take @@ -246,36 +252,13 @@ (define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) (define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) (define (common:get-sync-lock-filepath) (let* ((tmp-area (common:get-db-tmp-area)) - (lockfile (conc tmp-area "/megatest.db.sync-lock"))) + (lockfile (conc tmp-area "/megatest.db.lock"))) lockfile)) -;;====================================================================== -;; when called from a wrapper I need sometimes to find the calling -;; wrapper, this is for dashboard to find the correct megatest. -;; -(define (common:find-local-megatest #!optional (progname "megatest")) - (let ((res (filter file-exists? - (map (lambda (updir) - (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) (conc updir progname)) - ((mtest) (conc updir progname)) - ((dashboard) progname) - (else exe))))) - '("../../" "../"))))) - (if (null? res) - (begin - (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") - progname) - (car res)))) - (define *common:logpro-exit-code->status-sym-alist* '( ( 0 . pass ) ( 1 . fail ) ( 2 . warn ) ( 3 . check ) @@ -315,26 +298,39 @@ (else "FAIL"))) (define (common:logpro-exit-code->test-status exit-code) (status-sym->string (common:logpro-exit-code->status-sym exit-code))) +;; (defstruct remote + + ;; transport to be used + ;; http - use http-transport + ;; http-read-cached - use http-transport for writes but in-mem cached for reads + (rmode 'http) (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) (cons #f #f)))) (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) res)) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) - (server-info (if *toppath* (server:check-if-running *toppath*) #f)) + (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive - (connect-time (current-seconds)) - (conndat #f) - (transport *transport-type*) + (connect-time (current-seconds)) ;; when we first connected + (last-access (current-seconds)) ;; last time we talked to server + ;; (conndat #f) ;; iface port api-uri api-url api-req seconds server-id (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode + (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode + + ;; conndat stuff + (iface #f) ;; TODO: Consolidate this data with server-url and server-info above + (port #f) + (api-url #f) + (api-uri #f) + (api-req #f)) ;; launching and hosts (defstruct host (reachable #f) (last-update 0) @@ -408,27 +404,48 @@ (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) + +;; From 1.70 to 1.80, db's are compatible. + (define (common:api-changed?) - (not (equal? (substring (->string megatest-version) 0 4) - (substring (conc (common:get-last-run-version)) 0 4)))) + (let* ( + (megatest-major-version (substring (->string megatest-version) 0 4)) + (run-major-version (substring (conc (common:get-last-run-version)) 0 4)) + ) + (and (not (equal? megatest-major-version "1.80")) + (not (equal? megatest-major-version megatest-run-version))) + ) +) ;;====================================================================== ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; (define (common:cleanup-db dbstruct #!key (full #f)) - (apply db:multi-db-sync - dbstruct - 'schema - 'killservers - 'adj-target - 'new2old - '(dejunk) - ) + (case (rmt:transport-mode) + ((http) + (apply db:multi-db-sync + dbstruct + 'schema + 'killservers + 'adj-target + 'new2old + '(dejunk) + )) + ((tcp nfs) + (debug:print 0 *default-log-port* "WARNING: cleanup-db NOT implemented yet for tcp and nfs.") + #;(apply db:multi-db-sync + dbstruct + 'schema + 'killservers + 'adj-target + 'new2old + '(dejunk) + ))) (if (common:api-changed?) (common:set-last-run-version))) (define (common:snapshot-file filepath #!key (subdir ".") ) (if (file-exists? filepath) @@ -520,11 +537,11 @@ (define (common:rotate-logs) (let* ((all-files (make-hash-table)) (stats (make-hash-table)) (inc-stat (lambda (key) (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) - (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age + (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) (handle-exceptions exn @@ -599,14 +616,15 @@ ;;====================================================================== ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; Do NOT check if not on homehost! ;; (define (common:exit-on-version-changed) - (if (common:on-homehost?) + (if (and *toppath* ;; do nothing if *toppath* not yet provided + (common:on-homehost?)) (if (common:api-changed?) (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db")) (read-only (not (file-write-access? dbfile))) (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) (debug:print 0 *default-log-port* "WARNING: Version mismatch!\n" " expected: " (common:version-signature) "\n" @@ -626,14 +644,14 @@ (common:cleanup-db dbstruct))) ((not (common:file-exists? mtconf)) (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") (exit 1)) ((not (common:file-exists? dbfile)) - (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") + (debug:print 0 *default-log-port* " .mtdb/main.db does not exist in this area. Cannot proceed with megatest version migration.") (exit 1)) ((not (eq? (current-user-id)(file-owner mtconf))) - (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") + (debug:print 0 *default-log-port* " You do not own .mtdb/main.db in this area. Cannot proceed with megatest version migration.") (exit 1)) (read-only (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") (exit 1)) (else @@ -710,22 +728,21 @@ "")))) (define (common:alist-ref/default key alist default) (or (alist-ref key alist) default)) -(define (common:low-noise-print waitval . keys) - (let* ((key (string-intersperse (map conc keys) "-" )) - (lasttime (hash-table-ref/default *common:denoise* key 0)) - (currtime (current-seconds))) - (if (> (- currtime lasttime) waitval) - (begin - (hash-table-set! *common:denoise* key currtime) - #t) - #f))) - -(define (common:get-megatest-exe) - (or (getenv "MT_MEGATEST") "megatest")) +;; moved into commonmod +;; +;; (define (common:low-noise-print waitval . keys) +;; (let* ((key (string-intersperse (map conc keys) "-" )) +;; (lasttime (hash-table-ref/default *common:denoise* key 0)) +;; (currtime (current-seconds))) +;; (if (> (- currtime lasttime) waitval) +;; (begin +;; (hash-table-set! *common:denoise* key currtime) +;; #t) +;; #f))) (define (common:read-encoded-string instr) (handle-exceptions exn (handle-exceptions @@ -946,49 +963,45 @@ (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) (exit 1)) - (let* ((tsname (common:get-testsuite-name)) + (let* ((toppath (common:real-path *toppath*)) + (tsname (common:get-testsuite-name)) (dbpath (common:get-create-writeable-dir (list (conc "/tmp/" (current-user-name) "/megatest_localdb/" tsname "/" - (string-translate *toppath* "/" ".")) + (string-translate toppath "/" ".")) (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name "/megatest_localdb/" tsname - (string-translate *toppath* "/" ".")) + (string-translate toppath "/" ".")) )))) (set! *db-cache-path* dbpath) - ;; ensure megatest area has .megatest - (let ((dbarea (conc *toppath* "/.megatest"))) + ;; ensure megatest area has .mtdb + (let ((dbarea (conc *toppath* "/.mtdb"))) (if (not (file-exists? dbarea)) (create-directory dbarea))) - ;; ensure tmp area has .megatest - (let ((dbarea (conc dbpath "/.megatest"))) + ;; ensure tmp area has .mtdb + (let ((dbarea (conc dbpath "/.mtdb"))) (if (not (file-exists? dbarea)) (create-directory dbarea))) dbpath)) #f))) (define (common:get-area-path-signature) (message-digest-string (md5-primitive) *toppath*)) -(define (common:get-signature str) - (message-digest-string (md5-primitive) str)) - ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (common:run-sync?) - (and (common:on-homehost?) - (args:get-arg "-server"))) - -(define (common:human-time) - (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) + (and *toppath* ;; gate if called before *toppath* is set + (common:on-homehost?) + (args:get-arg "-server"))) (define (std-signal-handler signum) ;; (signal-mask! signum) (set! *time-to-exit* #t) @@ -1048,26 +1061,10 @@ (define (common:get-disks #!key (configf #f)) (hash-table-ref/default (or configf (read-config "megatest.config" #f #t)) "disks" '("none" ""))) -;;====================================================================== -;; return first command that exists, else #f -;; -(define (common:which cmds) - (if (null? cmds) - #f - (let loop ((hed (car cmds)) - (tal (cdr cmds))) - (let ((res (with-input-from-pipe (conc "which " hed) read-line))) - (if (and (string? res) - (common:file-exists? res)) - res - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - (define (common:get-install-area) (let ((exe-path (car (argv)))) (if (common:file-exists? exe-path) (handle-exceptions exn @@ -1345,11 +1342,11 @@ (else (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") #t)))) ;; default to requiring server (if force-result (begin - (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") + (debug:print-info 0 *default-log-port* "ATTENTION! Forcing use of server, force setting is \"" force-setting "\".") #t) #f))) ;;====================================================================== ;; M I S C L I S T S @@ -1598,10 +1595,30 @@ path) ;; just give up (with-input-from-pipe (conc "/bin/readlink -f " path) (lambda () (read-line))))) + +;; for reasons I don't understand multiple calls to real-path in parallel threads +;; must be protected by mutexes +;; +(define (common:real-path inpath) + ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) + ;; (let-values + ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) + ;; (with-input-from-port inp + ;; (let loop ((inl (read-line)) + ;; (res #f)) + ;; (print "inl=" inl) + ;; (if (eof-object? inl) + ;; (begin + ;; (close-input-port inp) + ;; (close-output-port oup) + ;; ;; (process-wait pid) + ;; res) + ;; (loop (read-line) inl)))))) + (with-input-from-pipe (conc "readlink -f " inpath) read-line)) ;;====================================================================== ;; returns *effective load* (not normalized) ;; (define (common:get-intercept onemin fivemin) @@ -1797,25 +1814,22 @@ (> (length res) 2)) res) ((eq? res #f) default) ;; add messages? ((eq? res #f) default) ;; this would be the #eof (else default)))) + +(define (common:ssh-get-loadavg remote-host) + (let ((inp (open-input-pipe (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")))) + (let* ((res (read-lines inp))) + (close-input-pipe inp) + res))) (define (common:get-normalized-cpu-load-raw remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost (or (common:get-cached-info actual-host "normalized-load") (let ((data (if remote-host - (let ((inp #f)) - (handle-exceptions - exn - (begin - (close-input-port inp) - '()) - (set! inp (open-input-port (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\""))) - (let* ((res (read-lines inp))) - (close-input-port inp) - res))) + (common:ssh-get-loadavg remote-host) (append (with-input-from-file "/proc/loadavg" read-lines) (with-input-from-file "/proc/cpuinfo" read-lines) @@ -1989,16 +2003,27 @@ (host-last-used-set! rec curr-time) new-best) (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) (define (common:wait-for-homehost-load maxnormload msg) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (server:choose-server *toppath* 'homehost))) - (hh (if hh-dat (car hh-dat) #f))) - (common:wait-for-normalized-load maxnormload msg hh))) - + (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... + (if (not *toppath*) + (begin + (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") + (thread-sleep! 30) + (if (< (- (current-seconds) start-time) 300) + (loop start-time))))) + (case (rmt:transport-mode) + ((http) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (server:choose-server *toppath* 'homehost))) + (hh (if hh-dat (car hh-dat) #f))) + (common:wait-for-normalized-load maxnormload msg hh))) + (else + (common:wait-for-normalized-load maxnormload msg (get-host-name))))) + (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; hosts had better not be changing the number of cpus too often! (or (hash-table-ref/default *numcpus-cache* actual-host #f) (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) @@ -2023,27 +2048,10 @@ (common:write-cached-info actual-host "num-cpus" result)) result)))) (hash-table-set! *numcpus-cache* actual-host numcpus) numcpus)))) -(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f)) - (let ((inp #f)) - (handle-exceptions - exn - (begin - (close-input-port inp) - (if msg-proc - (msg-proc) - (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn)) - default) - (set! inp (open-input-pipe ssh-command)) - (with-input-from-port inp - (lambda () - (let ((res (proc))) - (close-input-port inp) - res)))))) - ;;====================================================================== ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) (let ((num-cpus (common:get-num-cpus remote-host))) @@ -2218,30 +2226,10 @@ (uname #f)) (if (null? (car uname-res)) "unknown" (caar uname-res)))) -;; for reasons I don't understand multiple calls to real-path in parallel threads -;; must be protected by mutexes -;; -(define (common:real-path inpath) - ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) - ;; (let-values - ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) - ;; (with-input-from-port inp - ;; (let loop ((inl (read-line)) - ;; (res #f)) - ;; (print "inl=" inl) - ;; (if (eof-object? inl) - ;; (begin - ;; (close-input-port inp) - ;; (close-output-port oup) - ;; ;; (process-wait pid) - ;; res) - ;; (loop (read-line) inl)))))) - (with-input-from-pipe (conc "readlink -f " inpath) read-line)) - ;;====================================================================== ;; D I S K S P A C E ;;====================================================================== (define (common:get-disk-space-used fpath) @@ -2619,291 +2607,10 @@ (cond (with-vars (common:without-vars fullcmd)) (with-orig-env (common:with-orig-env fullcmd)) (else (common:without-vars fullcmd "MT_.*"))))) -;;====================================================================== -;; T I M E A N D D A T E -;;====================================================================== - -;;====================================================================== -;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 -(define (common:hms-string->seconds tstr) - (let ((parts (string-split-fields "\\w+" tstr)) - (time-secs 0) - ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks - (trx (regexp "(\\d+)([smhdMyw])"))) - (for-each (lambda (part) - (let ((match (string-match trx part))) - (if match - (let ((val (string->number (cadr match))) - (unt (caddr match))) - (if val - (set! time-secs (+ time-secs (* val - (case (string->symbol unt) - ((s) 1) - ((m) 60) ;; minutes - ((h) 3600) - ((d) 86400) - ((w) 604800) - ((M) 2628000) ;; aproximately one month - ((y) 31536000) - (else #f)))))))))) - parts) - time-secs)) - -(define (seconds->hr-min-sec secs) - (let* ((hrs (quotient secs 3600)) - (min (quotient (- secs (* hrs 3600)) 60)) - (sec (- secs (* hrs 3600)(* min 60)))) - (conc (if (> hrs 0)(conc hrs "hr ") "") - (if (> min 0)(conc min "m ") "") - sec "s"))) - -(define (seconds->time-string sec) - (time->string - (seconds->local-time sec) "%H:%M:%S")) - -(define (seconds->work-week/day-time sec) - (time->string - (seconds->local-time sec) "ww%V.%u %H:%M")) - -(define (seconds->work-week/day sec) - (time->string - (seconds->local-time sec) "ww%V.%u")) - -(define (seconds->year-work-week/day sec) - (time->string - (seconds->local-time sec) "%yww%V.%w")) - -(define (seconds->year-work-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yww%V.%w %H:%M")) - -(define (seconds->year-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yw%V.%w %H:%M")) - -(define (seconds->quarter sec) - (case (string->number - (time->string - (seconds->local-time sec) - "%m")) - ((1 2 3) 1) - ((4 5 6) 2) - ((7 8 9) 3) - ((10 11 12) 4) - (else #f))) - -;;====================================================================== -;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch -;; -(define (common:date-time->seconds datetime) - (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) - -;;====================================================================== -;; given span of seconds tstart to tend -;; find start time to mark and mark delta -;; -(define (common:find-start-mark-and-mark-delta tstart tend) - (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... - (result #f) - (min 60) - (hr (* 60 60)) - (day (* 24 hr)) - (yr (* 365 day)) ;; year - (mo (/ yr 12)) - (wk (* day 7))) - (for-each - (lambda (max-blks) - (for-each - (lambda (span) ;; 5 2 1 - (if (not result) - (for-each - (lambda (timeunit timesym) ;; year month day hr min sec - (if (not result) - (let* ((time-blk (* span timeunit)) - (num-blks (quotient deltat time-blk))) - (if (and (> num-blks 4)(< num-blks max-blks)) - (let ((first (* (quotient tstart time-blk) time-blk))) - (set! result (list span timeunit time-blk first timesym)) - ))))) - (list yr mo wk day hr min 1) - '( y mo w d h m s)))) - (list 8 6 5 2 1))) - '(5 10 15 20 30 40 50 500)) - (if values - (apply values result) - (values 0 day 1 0 'd)))) - -;;====================================================================== -;; given x y lim return the cron expansion -;; -(define (common:expand-cron-slash x y lim) - (let loop ((curr x) - (res `())) - (if (< curr lim) - (loop (+ curr y) (cons curr res)) - (reverse res)))) - -;;====================================================================== -;; expand a complex cron string to a list of cron strings -;; -;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c -;; -;; NOTE: with flatten a lot of the crud below can be factored down. -;; -(define (common:cron-expand cron-str) - (if (list? cron-str) - (flatten - (fold (lambda (x res) - (if (list? x) - (let ((newres (map common:cron-expand x))) - (append x newres)) - (cons x res))) - '() - cron-str)) ;; (map common:cron-expand cron-str)) - (let ((cron-items (string-split cron-str)) - (slash-rx (regexp "(\\d+)/(\\d+)")) - (comma-rx (regexp ".*,.*")) - (max-vals '((min . 60) - (hour . 24) - (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations - (month . 12) - (dayofweek . 7)))) - (if (< (length cron-items) 5) ;; bad spec - cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it - (let loop ((hed (car cron-items)) - (tal (cdr cron-items)) - (type 'min) - (type-tal '(hour dayofmonth month dayofweek)) - (res '())) - (regex-case - hed - (slash-rx ( _ base incr ) (let* ((basen (string->number base)) - (incrn (string->number incr)) - (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) - (new-list-crons (fold (lambda (x myres) - (cons (conc (if (null? res) - "" - (conc (string-intersperse res " ") " ")) - x " " (string-intersperse tal " ")) - myres)) - '() expanded-vals))) - ;; (print "new-list-crons: " new-list-crons) - ;; (fold (lambda (x res) - ;; (if (list? x) - ;; (let ((newres (map common:cron-expand x))) - ;; (append x newres)) - ;; (cons x res))) - ;; '() - (flatten (map common:cron-expand new-list-crons)))) - ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) - (else (if (null? tal) - cron-str - (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) - -;;====================================================================== -;; given a cron string and the last time event was processed return #t to run or #f to not run -;; -;; min hour dayofmonth month dayofweek -;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 -;; -;; #t => yes, run the job -;; #f => no, do not run the job -;; -(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. - (let* ((cron-items (map string->number (string-split cron-str))) - (now-seconds (or now-seconds-in (current-seconds))) - (now-time (seconds->local-time now-seconds)) - (last-done-time (seconds->local-time last-done)) - (all-times (make-hash-table))) - ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) - (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings - #f - (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) - cron-items) - ;; 0 1 2 3 4 5 6 - ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) - (vector->list now-time)) - ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) - (vector->list last-done-time))) - ;; create all possible time slots - ;; remove invalid slots due to (for example) day of week - ;; get the start and end entries for the ref-seconds (current) time - ;; if last-done > ref-seconds => this is an ERROR! - ;; does the last-done time fall in the legit region? - ;; yes => #f do not run again this command - ;; no => #t ok to run the command - (for-each ;; month - (lambda (month) - (for-each ;; dayofmonth - (lambda (dom) - (for-each - (lambda (hr) ;; hour - (for-each - (lambda (minute) ;; minute - (let ((copy-now (apply vector (vector->list now-time)))) - (vector-set! copy-now 0 0) ;; force seconds to zero - (vector-set! copy-now 1 minute) - (vector-set! copy-now 2 hr) - (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced - (vector-set! copy-now 4 month) - (let* ((copy-now-secs (local-time->seconds copy-now)) - (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector - (if (or (not cdayofweek) - (equal? (vector-ref new-copy 6) - cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified - (if (or (not cdayofmonth) - (equal? (vector-ref new-copy 3) - (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified - (hash-table-set! all-times copy-now-secs new-copy)))))) - (if cmin - `(,cmin) ;; if given cmin, have to use it - (list (- nmin 1) nmin (+ nmin 1))))) ;; minute - (if chour - `(,chour) - (list (- nhour 1) nhour (+ nhour 1))))) ;; hour - (if cdayofmonth - `(,cdayofmonth) - (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) - (if cmonth - `(,cmonth) - (list (- nmonth 1) nmonth (+ nmonth 1)))) - (let ((before #f) - (is-in #f)) - (for-each - (lambda (moment) - (if (and before - (<= before now-seconds) - (>= moment now-seconds)) - (begin - ;; (print) - ;; (print "Before: " (time->string (seconds->local-time before))) - ;; (print "Now: " (time->string (seconds->local-time now-seconds))) - ;; (print "After: " (time->string (seconds->local-time moment))) - ;; (print "Last: " (time->string (seconds->local-time last-done))) - (if (< last-done before) - (set! is-in before)) - )) - (set! before moment)) - (sort (hash-table-keys all-times) <)) - is-in))))) - -(define (common:extended-cron cron-str now-seconds-in last-done) - (let ((expanded-cron (common:cron-expand cron-str))) - (if (string? expanded-cron) - (common:cron-event expanded-cron now-seconds-in last-done) - (let loop ((hed (car expanded-cron)) - (tal (cdr expanded-cron))) - (if (common:cron-event hed now-seconds-in last-done) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - ;;====================================================================== ;; C O L O R S ;;====================================================================== (define (common:name->iup-color name) @@ -3139,49 +2846,10 @@ (if (null? tal) fallback-launcher (loop (car tal)(cdr tal)))))))) fallback-launcher))) -;;====================================================================== -;; NMSG AND NEW API -;;====================================================================== -;; -;; ;;====================================================================== -;; ;; nm based server experiment, keep around for now. -;; ;; -;; (define (nm:start-server dbconn #!key (given-host-name #f)) -;; (let* ((srvdat (start-raw-server given-host-name: given-host-name)) -;; (host-name (srvdat-host srvdat)) -;; (soc (srvdat-soc srvdat))) -;; -;; ;; start the queue processor (save for second round of development) -;; ;; -;; (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) -;; ;; msg is an alist -;; ;; 'r host:port <== where to return the data -;; ;; 'p params <== data to apply the command to -;; ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default -;; ;; 'c command <== look up the function to call using this key -;; ;; -;; (let loop ((msg-in (nn-recv soc))) -;; (if (not (equal? msg-in "quit")) -;; (let* ((dat (decode msg-in)) -;; (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client -;; (params (alist-ref 'p dat)) -;; (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) -;; (all-good (and host-port params command (hash-table-exists? *commands* command)))) -;; (if all-good -;; (let ((cmddat (make-qitem -;; command: command -;; host-port: host-port -;; params: params))) -;; (queue-push cmddat) ;; put request into the queue -;; (nn-send soc "queued")) ;; reply with "queued" -;; (print "ERROR: ["(common:human-time)"] BAD request " dat)) -;; (loop (nn-recv soc))))) -;; (nn-close soc))) - ;;====================================================================== ;; D A S H B O A R D U S E R V I E W S ;;====================================================================== ;;====================================================================== @@ -3347,11 +3015,17 @@ (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) ((not (file-read-access? pktsdir)) (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) (else (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) - (let ((pkts (glob (conc pktsdir "/*.pkt")))) + (let ((pkts (glob (conc pktsdir "/*.pkt"))) + (sqdb (dbi:db-conn pdb)) + ) + ;; Put this in a transaction to avoid issues overloading the db + (sqlite3:with-transaction + sqdb + (lambda () (for-each (lambda (pkt) (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) (exists (lookup-by-uuid pdb uuid #f))) (if (not exists) @@ -3362,11 +3036,11 @@ (ptype (alist-ref 'T apkt))) (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") ))) - pkts))))) + pkts))))))) pktsdirs)) use-lt: use-lt)) (define (common:get-pkt-alists pkts) (map (lambda (x) Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -81,133 +81,133 @@ ;; this was cached based on results from profiling but it turned out the profiling ;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching ;; in for now but can probably take it out later. ;; -(define (debug:calc-verbosity vstr) - (or (hash-table-ref/default *verbosity-cache* vstr #f) - (let ((res (cond - ((number? vstr) vstr) - ((not (string? vstr)) 1) - ;; ((string-match "^\\s*$" vstr) 1) - (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) - (cond - ((> (length debugvals) 1) debugvals) - ((> (length debugvals) 0)(car debugvals)) - (else 1)))) - ((args:get-arg "-v") 2) - ((args:get-arg "-q") 0) - (else 1)))) - (hash-table-set! *verbosity-cache* vstr res) - res))) - -;; check verbosity, #t is ok -(define (debug:check-verbosity verbosity vstr) - (if (not (or (number? verbosity) - (list? verbosity))) - (begin - (print "ERROR: Invalid debug value \"" vstr "\"") - #f) - #t)) - -(define (debug:debug-mode n) - (cond - ((and (number? *verbosity*) ;; number number - (number? n)) - (<= n *verbosity*)) - ((and (list? *verbosity*) ;; list number - (number? n)) - (member n *verbosity*)) - ((and (list? *verbosity*) ;; list list - (list? n)) - (not (null? (lset-intersection! eq? *verbosity* n)))) - ((and (number? *verbosity*) - (list? n)) - (member *verbosity* n)))) - -(define (debug:setup) - (let ((debugstr (or (args:get-arg "-debug") - (args:get-arg "-debug-noprop") - (getenv "MT_DEBUG_MODE")))) - (set! *verbosity* (debug:calc-verbosity debugstr)) - (debug:check-verbosity *verbosity* debugstr) - ;; if we were handed a bad verbosity rule then we will override it with 1 and continue - (if (not *verbosity*)(set! *verbosity* 1)) - (if (and (not (args:get-arg "-debug-noprop")) - (or (args:get-arg "-debug") - (not (getenv "MT_DEBUG_MODE")))) - (setenv "MT_DEBUG_MODE" (if (list? *verbosity*) - (string-intersperse (map conc *verbosity*) ",") - (conc *verbosity*)))))) - -(define (debug:print n e . params) - (if (debug:debug-mode n) - (with-output-to-port (or e (current-error-port)) - (lambda () - (if *logging* - (db:log-event (apply conc params)) - (apply print params) - ))))) +;; (define (debug:calc-verbosity vstr) +;; (or (hash-table-ref/default *verbosity-cache* vstr #f) +;; (let ((res (cond +;; ((number? vstr) vstr) +;; ((not (string? vstr)) 1) +;; ;; ((string-match "^\\s*$" vstr) 1) +;; (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) +;; (cond +;; ((> (length debugvals) 1) debugvals) +;; ((> (length debugvals) 0)(car debugvals)) +;; (else 1)))) +;; ((args:get-arg "-v") 2) +;; ((args:get-arg "-q") 0) +;; (else 1)))) +;; (hash-table-set! *verbosity-cache* vstr res) +;; res))) + +;; ;; check verbosity, #t is ok +;; (define (debug:check-verbosity verbosity vstr) +;; (if (not (or (number? verbosity) +;; (list? verbosity))) +;; (begin +;; (print "ERROR: Invalid debug value \"" vstr "\"") +;; #f) +;; #t)) +;; +;; (define (debug:debug-mode n) +;; (cond +;; ((and (number? *verbosity*) ;; number number +;; (number? n)) +;; (<= n *verbosity*)) +;; ((and (list? *verbosity*) ;; list number +;; (number? n)) +;; (member n *verbosity*)) +;; ((and (list? *verbosity*) ;; list list +;; (list? n)) +;; (not (null? (lset-intersection! eq? *verbosity* n)))) +;; ((and (number? *verbosity*) +;; (list? n)) +;; (member *verbosity* n)))) +;; +;; (define (debug:setup) +;; (let ((debugstr (or (args:get-arg "-debug") +;; (args:get-arg "-debug-noprop") +;; (getenv "MT_DEBUG_MODE")))) +;; (set! *verbosity* (debug:calc-verbosity debugstr)) +;; (debug:check-verbosity *verbosity* debugstr) +;; ;; if we were handed a bad verbosity rule then we will override it with 1 and continue +;; (if (not *verbosity*)(set! *verbosity* 1)) +;; (if (and (not (args:get-arg "-debug-noprop")) +;; (or (args:get-arg "-debug") +;; (not (getenv "MT_DEBUG_MODE")))) +;; (setenv "MT_DEBUG_MODE" (if (list? *verbosity*) +;; (string-intersperse (map conc *verbosity*) ",") +;; (conc *verbosity*)))))) +;; +;; (define (debug:print n e . params) +;; (if (debug:debug-mode n) +;; (with-output-to-port (or e (current-error-port)) +;; (lambda () +;; (if *logging* +;; (db:log-event (apply conc params)) +;; (apply print params) +;; ))))) ;; Brandon's debug printer shortcut (indulge me :) -(define *BB-process-starttime* (current-milliseconds)) -(define (BB> . in-args) - (let* ((stack (get-call-chain)) - (location "??")) - (for-each - (lambda (frame) - (let* ((this-loc (vector-ref frame 0)) - (temp (string-split (->string this-loc) " ")) - (this-func (if (and (list? temp) (> (length temp) 1)) (cadr temp) "???"))) - (if (equal? this-func "BB>") - (set! location this-loc)))) - stack) - (let* ((color-on "\x1b[1m") - (color-off "\x1b[0m") - (dp-args - (append - (list 0 *default-log-port* - (conc color-on location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000) color-off " ") ) - in-args))) - (apply debug:print dp-args)))) - -(define *BBpp_custom_expanders_list* (make-hash-table)) - - - -;; register hash tables with BBpp. -(hash-table-set! *BBpp_custom_expanders_list* HASH_TABLE: - (cons hash-table? hash-table->alist)) - -;; test name converter -(define (BBpp_custom_converter arg) - (let ((res #f)) - (for-each - (lambda (custom-type-name) - (let* ((custom-type-info (hash-table-ref *BBpp_custom_expanders_list* custom-type-name)) - (custom-type-test (car custom-type-info)) - (custom-type-converter (cdr custom-type-info))) - (when (and (not res) (custom-type-test arg)) - (set! res (custom-type-converter arg))))) - (hash-table-keys *BBpp_custom_expanders_list*)) - (if res (BBpp_ res) arg))) - -(define (BBpp_ arg) - (cond - ;;((SOMESTRUCT? arg) (cons SOMESTRUCT: (SOMESTRUCT->alist arg))) - ;;((dboard:tabdat? arg) (cons dboard:tabdat: (dboard:tabdat->alist arg))) - ((hash-table? arg) - (let ((al (hash-table->alist arg))) - (BBpp_ (cons HASH_TABLE: al)))) - ((null? arg) '()) - ;;((list? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg)))) - ((pair? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg)))) - (else (BBpp_custom_converter arg)))) - -;; Brandon's pretty printer. It expands hashes and custom types in addition to regular pp -(define (BBpp arg) - (pp (BBpp_ arg))) +;; (define *BB-process-starttime* (current-milliseconds)) +;; (define (BB> . in-args) +;; (let* ((stack (get-call-chain)) +;; (location "??")) +;; (for-each +;; (lambda (frame) +;; (let* ((this-loc (vector-ref frame 0)) +;; (temp (string-split (->string this-loc) " ")) +;; (this-func (if (and (list? temp) (> (length temp) 1)) (cadr temp) "???"))) +;; (if (equal? this-func "BB>") +;; (set! location this-loc)))) +;; stack) +;; (let* ((color-on "\x1b[1m") +;; (color-off "\x1b[0m") +;; (dp-args +;; (append +;; (list 0 *default-log-port* +;; (conc color-on location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000) color-off " ") ) +;; in-args))) +;; (apply debug:print dp-args)))) +;; +;; (define *BBpp_custom_expanders_list* (make-hash-table)) +;; +;; +;; +;; ;; register hash tables with BBpp. +;; (hash-table-set! *BBpp_custom_expanders_list* HASH_TABLE: +;; (cons hash-table? hash-table->alist)) +;; +;; ;; test name converter +;; (define (BBpp_custom_converter arg) +;; (let ((res #f)) +;; (for-each +;; (lambda (custom-type-name) +;; (let* ((custom-type-info (hash-table-ref *BBpp_custom_expanders_list* custom-type-name)) +;; (custom-type-test (car custom-type-info)) +;; (custom-type-converter (cdr custom-type-info))) +;; (when (and (not res) (custom-type-test arg)) +;; (set! res (custom-type-converter arg))))) +;; (hash-table-keys *BBpp_custom_expanders_list*)) +;; (if res (BBpp_ res) arg))) +;; +;; (define (BBpp_ arg) +;; (cond +;; ;;((SOMESTRUCT? arg) (cons SOMESTRUCT: (SOMESTRUCT->alist arg))) +;; ;;((dboard:tabdat? arg) (cons dboard:tabdat: (dboard:tabdat->alist arg))) +;; ((hash-table? arg) +;; (let ((al (hash-table->alist arg))) +;; (BBpp_ (cons HASH_TABLE: al)))) +;; ((null? arg) '()) +;; ;;((list? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg)))) +;; ((pair? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg)))) +;; (else (BBpp_custom_converter arg)))) +;; +;; ;; Brandon's pretty printer. It expands hashes and custom types in addition to regular pp +;; (define (BBpp arg) +;; (pp (BBpp_ arg))) ;(use define-macro) (define-syntax inspect (syntax-rules () [(_ x) @@ -215,40 +215,40 @@ (printf "~a is: ~a\n" 'x (with-output-to-string (lambda () (BBpp x)))) ;; ) ] [(_ x y ...) (begin (inspect x) (inspect y ...))])) -(define (debug:print-error n e . params) - ;; normal print - (if (debug:debug-mode n) - (with-output-to-port (if (port? e) e (current-error-port)) - (lambda () - (if *logging* - (db:log-event (apply conc params)) - ;; (apply print "pid:" (current-process-id) " " params) - (apply print "ERROR: " params) - )))) - ;; pass important messages to stderr - (if (and (eq? n 0)(not (eq? e (current-error-port)))) - (with-output-to-port (current-error-port) - (lambda () - (apply print "ERROR: " params) - )))) - -(define (debug:print-info n e . params) - (if (debug:debug-mode n) - (with-output-to-port (if (port? e) e (current-error-port)) - (lambda () - (if *logging* - (let ((res (format#format #f "INFO: (~a) ~a" n (apply conc params)))) - (db:log-event res)) - ;; (apply print "pid:" (current-process-id) " " "INFO: (" n ") " params) ;; res) - (apply print "INFO: (" n ") " params) ;; res) - ))))) - +;; (define (debug:print-error n e . params) +;; ;; normal print +;; (if (debug:debug-mode n) +;; (with-output-to-port (if (port? e) e (current-error-port)) +;; (lambda () +;; (if *logging* +;; (db:log-event (apply conc params)) +;; ;; (apply print "pid:" (current-process-id) " " params) +;; (apply print "ERROR: " params) +;; )))) +;; ;; pass important messages to stderr +;; (if (and (eq? n 0)(not (eq? e (current-error-port)))) +;; (with-output-to-port (current-error-port) +;; (lambda () +;; (apply print "ERROR: " params) +;; )))) +;; +;; (define (debug:print-info n e . params) +;; (if (debug:debug-mode n) +;; (with-output-to-port (if (port? e) e (current-error-port)) +;; (lambda () +;; (if *logging* +;; (let ((res (format#format #f "INFO: (~a) ~a" n (apply conc params)))) +;; (db:log-event res)) +;; ;; (apply print "pid:" (current-process-id) " " "INFO: (" n ") " params) ;; res) +;; (apply print "INFO: (" n ") " params) ;; res) +;; ))))) +;; ;; if a value is printable (i.e. string or number) return the value ;; else return an empty string (define-inline (printable val) (if (or (number? val)(string? val)) val "")) Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -17,21 +17,77 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit commonmod)) +(declare (uses debugprint)) (use srfi-69) (module commonmod * -(import scheme chicken data-structures extras files) -(import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 srfi-69 - md5 message-digest - regex srfi-1) +(import scheme) +(cond-expand + (chicken-4 + + (import chicken + ports + + (prefix sqlite3 sqlite3:) + data-structures + extras + files + matchable + md5 + message-digest + pathname-expand + posix + posix-extras + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + + debugprint + ) + (use srfi-69)) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.io + chicken.pathname + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + + matchable + md5 + message-digest + pathname-expand + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + system-information + ))) ;;====================================================================== ;; CONTENTS ;; ;; config file utils @@ -41,10 +97,16 @@ ;;====================================================================== (include "megatest-version.scm") (include "megatest-fossil-hash.scm") +;; http - use the old http + in /tmp db +;; tcp - use tcp transport with cachedb db +;; nfs - use direct to disk access (read-only) +;; +(define rmt:transport-mode (make-parameter 'tcp)) + (define (get-full-version) (conc megatest-version "-" megatest-fossil-hash)) (define (version-signature) (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) @@ -59,10 +121,22 @@ (begin (hash-table-set! *common:denoise* key currtime) #t) #f))) +;; KEEP THIS ONE +;; +;; client:get-signature + +(define *my-client-signature* #f) + +(define (client:get-signature) + (if *my-client-signature* *my-client-signature* + (let ((sig (conc (get-host-name) " " (current-process-id)))) + (set! *my-client-signature* sig) + *my-client-signature*))) + ;;====================================================================== ;; config file utils ;;====================================================================== (define (lookup cfgdat section var) @@ -136,10 +210,58 @@ ;;====================================================================== ;; misc conversion, data manipulation functions ;;====================================================================== +;;====================================================================== +;; return first command that exists, else #f +;; +(define (common:which cmds) + (if (null? cmds) + #f + (let loop ((hed (car cmds)) + (tal (cdr cmds))) + (let ((res (with-input-from-pipe (conc "which " hed) read-line))) + (if (and (string? res) + (file-exists? res)) + res + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + +(define (common:get-megatest-exe) + (let* ((mtexe (or (get-environment-variable "MT_MEGATEST") + (common:which '("megatest")) + "megatest"))) + (if (file-exists? mtexe) + (realpath mtexe) + mtexe))) + +(define (common:get-megatest-exe-dir) + (let* ((mtexe (common:get-megatest-exe))) + (pathname-directory mtexe))) + +;; more generic and comprehensive version of get-megatest-exe +;; +(define (common:get-mtexe) + (let* ((mtpathdir (common:get-megatest-exe-dir))) + (or (common:get-megatest-exe) + (if mtpathdir + (conc mtpathdir"/megatest") + #f) + "megatest"))) + +(define (common:get-megatest-exe-path) + (let* ((mtpathdir (common:get-megatest-exe-dir))) + (conc mtpathdir":"(get-environment-variable "PATH") ":."))) + +(cond-expand + (chicken-4 + (define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) ))) + (chicken-5 + (define (realpath x) (normalize-pathname (pathname-expand (or x "/dev/null")))))) + ;; if it looks like a number -> convert it to a number, else return it ;; (define (lazy-convert inval) (let* ((as-num (if (string? inval)(string->number inval) #f))) (or as-num inval))) @@ -161,10 +283,40 @@ (filter (lambda (x) (not (string-match "^\\s*" x))) val-list)) '()))) +(define (get-cpu-load) + (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines))) + (map string->number (string-split load-info)))) + +(define *current-host-cores* #f) + +(define (get-current-host-cores) + (or *current-host-cores* + (let ((cpu-info (with-input-from-file "/proc/cpuinfo" read-lines))) + (let loop ((lines cpu-info)) + (if (null? lines) + 1 ;; gotta be at least one! + (let* ((inl (car lines)) + (tail (cdr lines)) + (parts (string-split inl))) + (match parts + (("cpu" "cores" ":" num) (string->number num)) + (else (loop tail))))))))) + +(define (number-of-processes-running processname) + (with-input-from-pipe + (conc "ps -def | egrep \""processname"\" |wc -l") + (lambda () + (string->number (read-line))))) + +;; get the normalized (i.e. load / numcpus) for *this* host +;; +(define (get-normalized-cpu-load) + (/ (get-cpu-load)(get-current-host-cores))) + ;;====================================================================== ;; testsuite and area utilites ;;====================================================================== (define (get-testsuite-name toppath configdat) @@ -208,18 +360,387 @@ (let ((adat (get-section cfgdat "areas"))) (map (lambda (entry) `(,(car entry) . ,(val->alist (cadr entry)))) adat))) + +;;====================================================================== +;; time utils +;;====================================================================== + +(define (common:human-time) + (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) + +;;====================================================================== +;; T I M E A N D D A T E +;;====================================================================== + +;;====================================================================== +;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 +(define (common:hms-string->seconds tstr) + (let ((parts (string-split-fields "\\w+" tstr)) + (time-secs 0) + ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks + (trx (regexp "^(\\d+)([smhdMyw])$"))) + (for-each (lambda (part) + (let ((match (string-match trx part))) + (if match + (let ((val (string->number (cadr match))) + (unt (caddr match))) + (if val + (set! time-secs (+ time-secs (* val + (case (string->symbol unt) + ((s) 1) + ((m) 60) ;; minutes + ((h) 3600) + ((d) 86400) + ((w) 604800) + ((M) 2628000) ;; aproximately one month + ((y) 31536000) + (else + 0))))))) + ;; (print "ERROR: can't parse timestring "tstr", component "part) + ;; can't (yet) use debugprint. rely on -show-config for user to find errors + ))) + parts) + time-secs)) + +(define (seconds->hr-min-sec secs) + (let* ((hrs (quotient secs 3600)) + (min (quotient (- secs (* hrs 3600)) 60)) + (sec (- secs (* hrs 3600)(* min 60)))) + (conc (if (> hrs 0)(conc hrs "hr ") "") + (if (> min 0)(conc min "m ") "") + sec "s"))) + +(define (seconds->time-string sec) + (time->string + (seconds->local-time sec) "%H:%M:%S")) + +(define (seconds->work-week/day-time sec) + (time->string + (seconds->local-time sec) "ww%V.%u %H:%M")) + +(define (seconds->work-week/day sec) + (time->string + (seconds->local-time sec) "ww%V.%u")) + +(define (seconds->year-work-week/day sec) + (time->string + (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yww%V.%w %H:%M")) + +(define (seconds->year-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yw%V.%w %H:%M")) + +(define (seconds->quarter sec) + (case (string->number + (time->string + (seconds->local-time sec) + "%m")) + ((1 2 3) 1) + ((4 5 6) 2) + ((7 8 9) 3) + ((10 11 12) 4) + (else #f))) + +;;====================================================================== +;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch +;; +(define (common:date-time->seconds datetime) + (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) + +;;====================================================================== +;; given span of seconds tstart to tend +;; find start time to mark and mark delta +;; +(define (common:find-start-mark-and-mark-delta tstart tend) + (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... + (result #f) + (min 60) + (hr (* 60 60)) + (day (* 24 hr)) + (yr (* 365 day)) ;; year + (mo (/ yr 12)) + (wk (* day 7))) + (for-each + (lambda (max-blks) + (for-each + (lambda (span) ;; 5 2 1 + (if (not result) + (for-each + (lambda (timeunit timesym) ;; year month day hr min sec + (if (not result) + (let* ((time-blk (* span timeunit)) + (num-blks (quotient deltat time-blk))) + (if (and (> num-blks 4)(< num-blks max-blks)) + (let ((first (* (quotient tstart time-blk) time-blk))) + (set! result (list span timeunit time-blk first timesym)) + ))))) + (list yr mo wk day hr min 1) + '( y mo w d h m s)))) + (list 8 6 5 2 1))) + '(5 10 15 20 30 40 50 500)) + (if values + (apply values result) + (values 0 day 1 0 'd)))) + +;;====================================================================== +;; given x y lim return the cron expansion +;; +(define (common:expand-cron-slash x y lim) + (let loop ((curr x) + (res `())) + (if (< curr lim) + (loop (+ curr y) (cons curr res)) + (reverse res)))) + +;;====================================================================== +;; expand a complex cron string to a list of cron strings +;; +;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c +;; +;; NOTE: with flatten a lot of the crud below can be factored down. +;; +(define (common:cron-expand cron-str) + (if (list? cron-str) + (flatten + (fold (lambda (x res) + (if (list? x) + (let ((newres (map common:cron-expand x))) + (append x newres)) + (cons x res))) + '() + cron-str)) ;; (map common:cron-expand cron-str)) + (let ((cron-items (string-split cron-str)) + (slash-rx (regexp "(\\d+)/(\\d+)")) + (comma-rx (regexp ".*,.*")) + (max-vals '((min . 60) + (hour . 24) + (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations + (month . 12) + (dayofweek . 7)))) + (if (< (length cron-items) 5) ;; bad spec + cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it + (let loop ((hed (car cron-items)) + (tal (cdr cron-items)) + (type 'min) + (type-tal '(hour dayofmonth month dayofweek)) + (res '())) + (regex-case + hed + (slash-rx ( _ base incr ) (let* ((basen (string->number base)) + (incrn (string->number incr)) + (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) + (new-list-crons (fold (lambda (x myres) + (cons (conc (if (null? res) + "" + (conc (string-intersperse res " ") " ")) + x " " (string-intersperse tal " ")) + myres)) + '() expanded-vals))) + ;; (print "new-list-crons: " new-list-crons) + ;; (fold (lambda (x res) + ;; (if (list? x) + ;; (let ((newres (map common:cron-expand x))) + ;; (append x newres)) + ;; (cons x res))) + ;; '() + (flatten (map common:cron-expand new-list-crons)))) + ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) + (else (if (null? tal) + cron-str + (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) + +;;====================================================================== +;; given a cron string and the last time event was processed return #t to run or #f to not run +;; +;; min hour dayofmonth month dayofweek +;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 +;; +;; #t => yes, run the job +;; #f => no, do not run the job +;; +(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. + (let* ((cron-items (map string->number (string-split cron-str))) + (now-seconds (or now-seconds-in (current-seconds))) + (now-time (seconds->local-time now-seconds)) + (last-done-time (seconds->local-time last-done)) + (all-times (make-hash-table))) + ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) + (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings + #f + (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) + cron-items) + ;; 0 1 2 3 4 5 6 + ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) + (vector->list now-time)) + ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) + (vector->list last-done-time))) + ;; create all possible time slots + ;; remove invalid slots due to (for example) day of week + ;; get the start and end entries for the ref-seconds (current) time + ;; if last-done > ref-seconds => this is an ERROR! + ;; does the last-done time fall in the legit region? + ;; yes => #f do not run again this command + ;; no => #t ok to run the command + (for-each ;; month + (lambda (month) + (for-each ;; dayofmonth + (lambda (dom) + (for-each + (lambda (hr) ;; hour + (for-each + (lambda (minute) ;; minute + (let ((copy-now (apply vector (vector->list now-time)))) + (vector-set! copy-now 0 0) ;; force seconds to zero + (vector-set! copy-now 1 minute) + (vector-set! copy-now 2 hr) + (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced + (vector-set! copy-now 4 month) + (let* ((copy-now-secs (local-time->seconds copy-now)) + (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector + (if (or (not cdayofweek) + (equal? (vector-ref new-copy 6) + cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified + (if (or (not cdayofmonth) + (equal? (vector-ref new-copy 3) + (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified + (hash-table-set! all-times copy-now-secs new-copy)))))) + (if cmin + `(,cmin) ;; if given cmin, have to use it + (list (- nmin 1) nmin (+ nmin 1))))) ;; minute + (if chour + `(,chour) + (list (- nhour 1) nhour (+ nhour 1))))) ;; hour + (if cdayofmonth + `(,cdayofmonth) + (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) + (if cmonth + `(,cmonth) + (list (- nmonth 1) nmonth (+ nmonth 1)))) + (let ((before #f) + (is-in #f)) + (for-each + (lambda (moment) + (if (and before + (<= before now-seconds) + (>= moment now-seconds)) + (begin + ;; (print) + ;; (print "Before: " (time->string (seconds->local-time before))) + ;; (print "Now: " (time->string (seconds->local-time now-seconds))) + ;; (print "After: " (time->string (seconds->local-time moment))) + ;; (print "Last: " (time->string (seconds->local-time last-done))) + (if (< last-done before) + (set! is-in before)) + )) + (set! before moment)) + (sort (hash-table-keys all-times) <)) + is-in))))) + +(define (common:extended-cron cron-str now-seconds-in last-done) + (let ((expanded-cron (common:cron-expand cron-str))) + (if (string? expanded-cron) + (common:cron-event expanded-cron now-seconds-in last-done) + (let loop ((hed (car expanded-cron)) + (tal (cdr expanded-cron))) + (if (common:cron-event hed now-seconds-in last-done) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + + ;;====================================================================== ;; misc stuff ;;====================================================================== -;; (define (debug:print . params) #f) -;; (define (debug:print-info . params) #f) -;; -;; (define (set-functions dbgp dbgpinfo) -;; (set! debug:print dbgp) -;; (set! debug:print-info dbgpinfo)) +(define (common:get-signature str) + (message-digest-string (md5-primitive) str)) + +;;====================================================================== +;; hash of hashs +;;====================================================================== + +(define (db:hoh-set! dat key1 key2 val) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (if subhash + (hash-table-set! subhash key2 val) + (begin + (hash-table-set! dat key1 (make-hash-table)) + (db:hoh-set! dat key1 key2 val))))) + +(define (db:hoh-get dat key1 key2) + (let* ((subhash (hash-table-ref/default dat key1 #f))) + (and subhash + (hash-table-ref/default subhash key2 #f)))) + +;;====================================================================== +;; when called from a wrapper I need sometimes to find the calling +;; wrapper, this is for dashboard to find the correct megatest. +;; +(define (common:find-local-megatest #!optional (progname "megatest")) + (let ((res (filter file-exists? + (map (lambda (updir) + (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) (conc updir progname)) + ((mtest) (conc updir progname)) + ((dashboard) progname) + (else exe))))) + '("../../" "../"))))) + (if (null? res) + (begin + ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path") + progname) + (car res)))) + +(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f)) + (let ((inp #f)) + (handle-exceptions + exn + (begin + (close-input-port inp) + (if msg-proc + (msg-proc) + (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn)) + default) + (set! inp (open-input-pipe ssh-command)) + (with-input-from-port inp + (lambda () + (let ((res (proc))) + (close-input-port inp) + res)))))) + +;; this is a close duplicate of: +;; process:alist-on-host? +;; process:alive +;; +(define (commonmod:is-test-alive host pid) + (let* ((same-host (equal? host (get-host-name))) + (cmd (conc + (if same-host "" (conc "ssh "host" ")) + "pstree -A "pid))) + (if (and host pid + (not (equal? host "n/a"))) + + (let* ((output (if same-host + (with-input-from-pipe cmd read-lines) + (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines))) + (debug:print 2 *default-log-port* "Running " cmd " received " output) + (if (eq? (length output) 0) + #f + #t)) + #t))) ;; assuming bad query is about a live test is likely not the right thing to do? + ) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -25,10 +25,19 @@ (use regex regex-case matchable) ;; directory-utils) (declare (unit configf)) (declare (uses process)) (declare (uses env)) (declare (uses keys)) +(declare (uses debugprint)) +(declare (uses mtargs)) +(declare (uses mtargs.import)) +(declare (uses common)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) +(import commonmod + (prefix mtargs args:) + debugprint) (include "common_records.scm") ;; return list (path fullpath configname) (define (find-config configname #!key (toppath #f)) @@ -98,10 +107,12 @@ (define (configf:system ht cmd) (system cmd) ) +(define configf:imports "(import commonmod (prefix mtargs args:))") + (define (configf:process-line l ht allow-system #!key (linenum #f)) (let loop ((res l)) (if (string? res) (let ((matchdat (string-search configf:var-expand-regex res))) (if matchdat @@ -111,11 +122,11 @@ (poststr (list-ref matchdat 4)) (result #f) (start-time (current-seconds)) (cmdsym (string->symbol cmdtype)) (fullcmd (case cmdsym - ((scheme scm) (conc "(lambda (ht)" cmd ")")) + ((scheme scm) (conc "(lambda (ht)(begin " configf:imports cmd "))")) ((system) (conc "(lambda (ht)(configf:system ht \"" cmd "\"))")) ((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))")) ((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))")) ((getenv gv) (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))")) ((mtrah) (conc "(lambda (ht)" Index: dashboard-context-menu.scm ================================================================== --- dashboard-context-menu.scm +++ dashboard-context-menu.scm @@ -21,32 +21,38 @@ ;;====================================================================== ;; implementation of context menu that pops up on ;; right click on test cell in Runs & Runs Summary Tabs ;;====================================================================== +(declare (unit dashboard-context-menu)) +(declare (uses common)) +(declare (uses commonmod)) +(declare (uses db)) +(declare (uses gutils)) +(declare (uses rmt)) +(declare (uses rmtmod)) +(declare (uses ezsteps)) +;; (declare (uses sdb)) +;; (declare (uses filedb)) +(declare (uses subrun)) + (use format fmt) (require-library iup) (import (prefix iup iup:)) (use canvas-draw) (use srfi-1 posix regex regex-case srfi-69) (use (prefix sqlite3 sqlite3:)) -(declare (unit dashboard-context-menu)) -(declare (uses common)) -(declare (uses db)) -(declare (uses gutils)) -(declare (uses rmt)) -(declare (uses ezsteps)) -;; (declare (uses sdb)) -;; (declare (uses filedb)) -(declare (uses subrun)) - (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") + +(import commonmod + rmtmod + debugprint) (define (dboard:launch-testpanel run-id test-id) (let* ((dboardexe (common:find-local-megatest "dashboard")) (cmd (conc dboardexe " -test " run-id "," test-id Index: dashboard-guimonitor.scm ================================================================== --- dashboard-guimonitor.scm +++ dashboard-guimonitor.scm @@ -34,10 +34,12 @@ (declare (unit dashboard-guimonitor)) (declare (uses common)) (declare (uses keys)) (declare (uses db)) (declare (uses tasks)) +(declare (uses commonmod)) +(import commonmod) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") (include "task_records.scm") Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -20,28 +20,33 @@ ;;====================================================================== ;; Test info panel ;;====================================================================== +(declare (unit dashboard-tests)) +(declare (uses common)) +(declare (uses commonmod)) +(declare (uses db)) +(declare (uses gutils)) +(declare (uses rmt)) +(declare (uses ezsteps)) +(declare (uses subrun)) +(declare (uses debugprint)) +(declare (uses rmtmod)) + (use format fmt) (require-library iup) (import (prefix iup iup:)) (use canvas-draw) (use srfi-1 posix regex regex-case srfi-69) (use (prefix sqlite3 sqlite3:)) -(declare (unit dashboard-tests)) -(declare (uses common)) -(declare (uses db)) -(declare (uses gutils)) -(declare (uses rmt)) -(declare (uses ezsteps)) -;; (declare (uses sdb)) -;; (declare (uses filedb)) -(declare (uses subrun)) +(import commonmod + rmtmod + debugprint) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") @@ -459,12 +464,11 @@ ;;====================================================================== ;; ;;====================================================================== (define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest) (let* ((db-path (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db")) - (dbstruct #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path: (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") - ;; local: #t)) + (dbstruct #f) ;; NOT USED (testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id)) (db-mod-time 0) ;; (file-modification-time db-path)) (last-update 0) ;; (current-seconds)) (request-update #t)) (if (not testdat) ADDED dashboard-transport-mode.scm.template Index: dashboard-transport-mode.scm.template ================================================================== --- /dev/null +++ dashboard-transport-mode.scm.template @@ -0,0 +1,22 @@ +;;====================================================================== +;; set up transport, db cache and sync methods +;; +;; sync-method: 'original, 'attach or 'none +;; cache-method: 'tmp or 'none +;; rmt:transport-mode: 'http, 'tcp, 'nfs +;; +;; NOTE: NOT ALL COMBINATIONS WORK +;; +;;====================================================================== + +;; uncomment this block to test without tcp or cachedb +;; (dbfile:sync-method 'none) +;; (dbfile:cache-method 'none) +;; (rmt:transport-mode 'nfs) + +;; uncomment this block to test with tcp and cachedb +(dbfile:sync-method 'none) ;; original was causing crash on start. +(dbfile:cache-method 'none) +(rmt:transport-mode 'nfs) + + Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -16,26 +16,17 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== -(use format) - -(require-library iup) -(import (prefix iup iup:)) - -(use canvas-draw) -(import canvas-draw-iup) -(use ducttape-lib) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors) ;; defstruct -(import (prefix sqlite3 sqlite3:)) -(import dbfile) - (declare (uses common)) -(declare (uses margs)) +(declare (uses mtargs)) +(declare (uses mtargs.import)) (declare (uses keys)) (declare (uses items)) +(declare (uses debugprint)) +(declare (uses debugprint.import)) (declare (uses db)) (declare (uses configf)) (declare (uses process)) (declare (uses launch)) (declare (uses runs)) @@ -44,21 +35,49 @@ (declare (uses dcommon)) (declare (uses dashboard-context-menu)) (declare (uses vg)) (declare (uses subrun)) (declare (uses mt)) -(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses rmtmod)) +(declare (uses dbfile)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) + +(use format) + +(require-library iup) +(import (prefix iup iup:)) + +(use canvas-draw) +(import canvas-draw-iup + (prefix sqlite3 sqlite3:)) + +(use ducttape-lib) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors) ;; defstruct + +(import commonmod + (prefix mtargs args:) + dbmod + dbfile + rmtmod + debugprint) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") (include "task_records.scm") (include "megatest-version.scm") (include "megatest-fossil-hash.scm") (include "vg_records.scm") +;; set some parameters here - these need to be put in something that can be loaded from other +;; executables such as dashboard and mtutil +;; +(include "dashboard-transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) +(set! rmtmod:send-receive rmt:send-receive) (define help (conc "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright (C) Matt Welland 2012-2017 @@ -70,10 +89,11 @@ -cols C : set number of columns -start-dir dir : start dashboard in the given directory -target target : filter runs tab to given target. -debug n[,n] : set debug level(s) e.g. -debug 4 or -debug 0,9 -repl : Start a chicken scheme interpreter + -mode MODE : tcp or nfs " )) ;; process args @@ -84,10 +104,11 @@ "-cols" "-test" ;; given a run id and test id, open only a test control panel on that test.. "-debug" "-start-dir" "-target" + "-mode" ;; tcp or nfs ) ;; switches (don't take arguments) (list "-h" "-skip-version-check" "-repl" @@ -94,11 +115,17 @@ "-:p" ;; ignore the built in chicken profiling switch ) args:arg-hash 0)) - +(if (args:get-arg "-mode") + (let* ((mode (string->symbol (args:get-arg "-mode")))) + (rmt:transport-mode mode))) + +(if (args:get-arg "-test") ;; need to use tcp for test control panel + (rmt:transport-mode 'tcp)) + ;; RA => Might require revert for filters ;; create a watch dog to move changes from lt/.db/*.db to megatest.db ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") @@ -204,11 +231,15 @@ tabdat)) ;; gets and calls updater list based on curr-tab-num ;; (define (dboard:common-run-curr-updaters commondat #!key (tab-num #f)) - (sync-db-to-tmp (dboard:common-get-tabdat commondat tab-num: tab-num)) + ;; (sync-db-to-tmp (dboard:common-get-tabdat commondat tab-num: tab-num)) ;; no longer applies + + ;; maybe need sleep here? + + (if (dboard:common-get-tabdat commondat tab-num: tab-num) ;; only update if there is a tabdat (let* ((tnum (or tab-num (dboard:commondat-curr-tab-num commondat))) (updaters (hash-table-ref/default (dboard:commondat-updaters commondat) tnum '()))) @@ -341,18 +372,18 @@ tests-tree ;; used in newdashboard ) ;; register tabdat with BBpp ;; this is used by BBpp (Brandon's pretty printer) to convert dboard:tabdat into a composition of lists that pp will handle -(hash-table-set! *BBpp_custom_expanders_list* TABDAT: - (cons dboard:tabdat? - (lambda (tabdat-item) - (filter - (lambda (alist-entry) - (member (car alist-entry) - '(allruns-by-id allruns))) ;; FIELDS OF INTEREST - (dboard:tabdat->alist tabdat-item))))) +;; (hash-table-set! *BBpp_custom_expanders_list* TABDAT: +;; (cons dboard:tabdat? +;; (lambda (tabdat-item) +;; (filter +;; (lambda (alist-entry) +;; (member (car alist-entry) +;; '(allruns-by-id allruns))) ;; FIELDS OF INTEREST +;; (dboard:tabdat->alist tabdat-item))))) (define (dboard:tabdat-target-string vec) (let ((targ (dboard:tabdat-target vec))) @@ -405,27 +436,29 @@ (make-dboard:runsdat runs-index: (make-hash-table) tests-index: (make-hash-table) matrix-dat: (make-sparse-array))) -;; used to keep the rundata from rmt:get-tests-for-run -;; in sync. +;; duplicated in dcommon.scm ;; -(defstruct dboard:rundat - run - tests-drawn ;; list of id's already drawn on screen - tests-notdrawn ;; list of id's NOT already drawn - rowsused ;; hash of lists covering what areas used - replace with quadtree - hierdat ;; put hierarchial sorted list here - tests ;; hash of id => testdat - ((tests-by-name (make-hash-table)) : hash-table) ;; hash of testfullname => testdat - key-vals - ((last-update 0) : number) ;; last query to db got records from before last-update - ((last-db-time 0) : number) ;; last timestamp on main.db - ((data-changed #f) : boolean) - ((run-data-offset 0) : number) ;; get only 100 items per call, set back to zero when received less than 100 items - (db-path #f)) +;; ;; used to keep the rundata from rmt:get-tests-for-run +;; ;; in sync. +;; ;; +;; (defstruct dboard:rundat +;; run +;; tests-drawn ;; list of id's already drawn on screen +;; tests-notdrawn ;; list of id's NOT already drawn +;; rowsused ;; hash of lists covering what areas used - replace with quadtree +;; hierdat ;; put hierarchial sorted list here +;; tests ;; hash of id => testdat +;; ((tests-by-name (make-hash-table)) : hash-table) ;; hash of testfullname => testdat +;; key-vals +;; ((last-update 0) : number) ;; last query to db got records from before last-update +;; ((last-db-time 0) : number) ;; last timestamp on main.db +;; ((data-changed #f) : boolean) +;; ((run-data-offset 0) : number) ;; get only 100 items per call, set back to zero when received less than 100 items +;; (db-path #f)) ;; for the new runs view lets build up a few new record types and then consolidate later ;; ;; this is a two level deep pipeline for the incoming data: ;; sql query data ==> filters ==> data for display @@ -491,18 +524,18 @@ duration ) ;; register dboard:rundat with BBpp ;; this is used by BBpp (Brandon's pretty printer) to convert dboard:rundat into a composition of lists that pp will handle -(hash-table-set! *BBpp_custom_expanders_list* RUNDAT: - (cons dboard:rundat? - (lambda (tabdat-item) - (filter - (lambda (alist-entry) - (member (car alist-entry) - '(run run-data-offset ))) ;; FIELDS OF INTEREST - (dboard:rundat->alist tabdat-item))))) +;; (hash-table-set! *BBpp_custom_expanders_list* RUNDAT: +;; (cons dboard:rundat? +;; (lambda (tabdat-item) +;; (filter +;; (lambda (alist-entry) +;; (member (car alist-entry) +;; '(run run-data-offset ))) ;; FIELDS OF INTEREST +;; (dboard:rundat->alist tabdat-item))))) (define (dboard:rundat-make-init #!key (run #f)(key-vals #f)(tests #f));; -100 is before time began @@ -640,11 +673,11 @@ (access-mode (dboard:tabdat-access-mode tabdat)) (num-to-get (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get") "200"))) (states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat))) (statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat))) - (do-not-use-db-file-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab + (do-not-use-db-file-timestamps #f) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab (do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab (sort-info (get-curr-sort)) (sort-by (vector-ref sort-info 1)) (sort-order (vector-ref sort-info 2)) (bubble-type (if (member sort-order '(testname)) @@ -663,12 +696,12 @@ (dboard:rundat-last-update run-dat))) (last-db-time (if do-not-use-db-file-timestamps 0 (dboard:rundat-last-db-time run-dat))) (db-path (or (dboard:rundat-db-path run-dat) - (let* ((db-dir (common:get-db-tmp-area)) - (db-pth (conc db-dir "/.megatest/main.db"))) + (let* ((db-dir (get-environment-variable "MT_RUN_AREA_HOME")) ;; (common:get-db-tmp-area)) + (db-pth (conc db-dir "/.mtdb/main.db"))) (dboard:rundat-db-path-set! run-dat db-pth) db-pth))) (db-mod-time (common:lazy-sqlite-db-modification-time db-path)) (db-modified (>= db-mod-time last-db-time)) (multi-get (> (dboard:rundat-run-data-offset run-dat) 0)) ;; multi-get in progress @@ -1076,11 +1109,11 @@ ;; - not appropriate for where all the runs are needed ;; (define (update-buttons tabdat uidat numruns numtests) (let* ((runs (if (> (length (dboard:tabdat-allruns tabdat)) numruns) (take-right (dboard:tabdat-allruns tabdat) numruns) - (pad-list (dboard:tabdat-allruns tabdat) numruns))) + (pad-list (dboard:tabdat-allruns tabdat) numruns))) (lftcol (dboard:uidat-get-lftcol uidat)) (tableheader (dboard:uidat-get-header uidat)) (table (dboard:uidat-get-runsvec uidat)) (coln 0) (all-test-names (make-hash-table)) @@ -3100,21 +3133,18 @@ (hash-table-ref/default (dboard:tabdat-last-db-update tabdat) context 0)) (define (dboard:set-last-db-update! tabdat context newtime) (hash-table-set! (dboard:tabdat-last-db-update tabdat) context newtime)) -;; DOES NOT WORK RELIABLY WITH /tmp WAL mode files. Timestamps only change when the db -;; is closed (I think). If db dir starts with /tmp always return true ;; (define (dashboard:database-changed? commondat tabdat #!key (context-key 'default)) (let* ((run-update-time (current-seconds)) - (dbdir (dboard:tabdat-dbdir tabdat)) + (dbdir *toppath*) (modtime (dashboard:get-youngest-run-db-mod-time dbdir)) (recalc (dashboard:recalc modtime (dboard:commondat-please-update commondat) (dboard:get-last-db-update tabdat context-key)))) - ;; (dboard:tabdat-last-db-update tabdat)))) (if recalc (dboard:set-last-db-update! tabdat context-key run-update-time)) (dboard:commondat-please-update-set! commondat #f) recalc)) @@ -3792,11 +3822,11 @@ (stop-the-train) (define (main) ;; (print "Starting dashboard main") - (let* ((mtdb-path (conc *toppath* "/.megatest/main.db")) + (let* ((mtdb-path (conc *toppath* "/.mtdb/main.db")) (target (args:get-arg "-target")) (commondat (dboard:commondat-make))) (if target (begin (args:remove-arg-from-ht "-target") @@ -3889,11 +3919,11 @@ ;; Sync to tmp only if in read-only mode. (define (sync-db-to-tmp tabdat) - (let* ((db-file "./.megatest/main.db")) + (let* ((db-file "./.mtdb/main.db")) (if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5))) (begin (db:multi-db-sync (db:setup #f) 'old2new) (set! last-copy-time (current-seconds)) ) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -22,13 +22,28 @@ ;; Database access ;;====================================================================== ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc +(declare (unit db)) +(declare (uses common)) +(declare (uses debugprint)) +(declare (uses dbmod)) +(declare (uses dbfile)) +(declare (uses keys)) +(declare (uses ods)) +(declare (uses mt)) +(declare (uses commonmod)) +(declare (uses mtargs)) +(declare (uses rmtmod)) + +(import commonmod + (prefix mtargs args:)) + (use (srfi 18) extras - tcp + ;; tcp stack (prefix sqlite3 sqlite3:) srfi-1 posix regex @@ -44,39 +59,37 @@ z3 typed-records matchable files) -(declare (unit db)) -(declare (uses common)) -(declare (uses dbmod)) -;; (declare (uses debugprint)) -(declare (uses dbfile)) -(declare (uses keys)) -(declare (uses ods)) -(declare (uses client)) -(declare (uses mt)) - (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") (define *number-of-writes* 0) (define *number-non-write-queries* 0) -(import dbmod) +(import debugprint) (import dbfile) +(import dbmod) +(import rmtmod) ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests ;; (defstruct dbr:counts (state #f) (status #f) (count 0)) +;; (define (db:with-db dbstruct run-id r/w proc . params) +;; (case (rmt:transport-mode) +;; ((http)(dbfile:with-db dbstruct run-id r/w proc params)) +;; ((tcp) (dbmod:with-db dbstruct run-id r/w proc params)) +;; ((nfs) (dbmod:with-db dbstruct run-id r/w proc params)) +;; (else (assert #f "FATAL: db:with-db called with non-existant transport mode")))) ;;====================================================================== ;; hash of hashs ;;====================================================================== @@ -92,19 +105,10 @@ (define (db:hoh-get dat key1 key2) (let* ((subhash (hash-table-ref/default dat key1 #f))) (and subhash (hash-table-ref/default subhash key2 #f)))) -(define (db:get-cache-stmth dbdat run-id db stmt) - (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id)) - (stmt-cache (dbr:dbdat-stmt-cache dbdat)) - (stmth (db:hoh-get stmt-cache db stmt))) - (or stmth - (let* ((newstmth (sqlite3:prepare db stmt))) - (db:hoh-set! stmt-cache db stmt newstmth) - newstmth)))) - ;;====================================================================== ;; SQLITE3 HELPERS ;;====================================================================== (define (db:general-sqlite-error-dump exn stmt . params) @@ -133,10 +137,83 @@ (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") (let* ((tmpdir (common:get-db-tmp-area))) (if (not *dbstruct-dbs*) (dbfile:setup do-sync *toppath* tmpdir) *dbstruct-dbs*))) + +;; moved from dbfile +;; +;; ADD run-id SUPPORT +;; +(define (db:create-all-triggers dbstruct) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (db:create-triggers db)))) + +(define (db:create-triggers db) + (for-each (lambda (key) + (sqlite3:execute db (cadr key))) + db:trigger-list)) + +(define (db:drop-all-triggers dbstruct) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (db:drop-triggers db)))) + +(define (db:have-incompletes? dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (or ovr-deadtime 72000))) ;; twenty hours + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) + ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") + run-id) + + ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) + ;; looks up subdb and returns it, if not found then set up ;; and then return it. ;; #;(define (db:get-db dbstruct run-id) @@ -150,12 +227,12 @@ ;; Get/open a database ;; if run-id => get run specific db ;; if #f => get main db ;; if run-id is a string treat it as a filename -;; if db already open - return inmem -;; if db not open, open inmem, rundb and sync then return inmem +;; if db already open - return cachedb +;; if db not open, open cachedb, rundb and sync then return cachedb ;; inuse gets set automatically for rundb's ;; ;; (define db:get-db db:get-subdb) ;; (define (db:get-db subdb #;dbstruct run-id) ;; RENAME TO db:get-dbh @@ -359,13 +436,15 @@ (define (db:cache-for-read-only source target #!key (use-last-update #f)) (if (and (hash-table-ref/default *global-db-store* target #f) (>= (file-modification-time target)(file-modification-time source))) (hash-table-ref *global-db-store* target) (let* ((toppath (launch:setup)) - (targ-db-last-mod (if (common:file-exists? target) - (file-modification-time target) - 0)) + (targ-db-last-mod (db:get-sqlite3-mod-time target)) +;; (if (common:file-exists? target) +;; BUG: This needs to include wal mode stuff .shm etc. +;; (file-modification-time target) +;; 0)) (cache-db (or (hash-table-ref/default *global-db-store* target #f) (db:open-megatest-db path: target))) (source-db (db:open-megatest-db path: source)) (curr-time (current-seconds)) (res '()) @@ -373,95 +452,99 @@ (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) (db:sync-tables db:sync-tests-only last-update source-db cache-db) (hash-table-set! *global-db-store* target cache-db) cache-db))) -;; ;; call a proc with a cached db -;; ;; -;; (define (db:call-with-cached-db proc . params) -;; ;; first cache the db in /tmp -;; (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) -;; (fname (conc (common:get-area-path-signature) ".db")) -;; (cache-dir (common:get-create-writeable-dir -;; (list (conc "/tmp/" (current-user-name) "/" cname-part) -;; (conc "/tmp/" (current-user-name) "-" cname-part) -;; (conc "/tmp/" (current-user-name) "_" cname-part)))) -;; (megatest-db (conc *toppath* "/megatest.db"))) -;; ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) -;; (if (not cache-dir) -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") -;; (exit 1)) -;; (let* ((th1 (make-thread -;; (lambda () -;; (if (and (common:file-exists? megatest-db) -;; (file-write-access? megatest-db)) -;; (begin -;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* -;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) -;; "call-with-cached-db sync-to-megatest.db")) -;; (cache-db (db:cache-for-read-only -;; megatest-db -;; (conc cache-dir "/" fname) -;; use-last-update: #t))) -;; (thread-start! th1) -;; (apply proc cache-db params) -;; )))) - - - - -(define (db:all-db-sync dbstruct) - (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) - (data-synced 0) ;; count of changed records - (tmp-area (common:get-db-tmp-area)) - (dbfiles (glob (conc tmp-area"/.megatest/*.db"))) - (sync-durations (make-hash-table)) - (no-sync-db (db:open-no-sync-db))) +(define (db:get-sqlite3-mod-time fname) + (let* ((wal-file (conc fname "-wal")) + (shm-file (conc fname "-shm")) + (get-mtime (lambda (f) + (if (and (file-exists? f) + (file-read-access? f)) + (file-modification-time f) + 0)))) + (max (get-mtime fname) + (get-mtime wal-file) + (get-mtime shm-file)))) + +;; (define (db:all-db-sync dbstruct) +;; (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) +;; (data-synced 0) ;; count of changed records +;; (tmp-area (common:get-db-tmp-area)) +;; (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) +;; (sync-durations (make-hash-table)) +;; (no-sync-db (db:open-no-sync-db))) +;; (for-each +;; (lambda (file) ;; tmp db file +;; (debug:print-info 3 *default-log-port* "file: " file) +;; (let* ((fname (conc (pathname-file file) ".db")) ;; fname is tmp db file +;; (wal-file (conc fname "-wal")) +;; (shm-file (conc fname "-shm")) +;; (fulln (conc *toppath*"/,mtdb/"fname)) ;; fulln is nfs db name +;; (wal-time (if (file-exists? wal-file) +;; (file-modification-time wal-file) +;; 0)) +;; (shm-time (if (file-exists? shm-file) +;; (file-modification-time shm-file) +;; 0)) +;; (time1 (db:get-sqlite3-mod-time file)) +;; ;; (if (file-exists? file) ;; time1 is the max itime of the tmp db, -wal and -shm files. +;; ;; (max (file-modification-time file) wal-time shm-time) +;; ;; (begin +;; ;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "file) +;; ;; 1))) +;; (time2 (db:get-sqlite3-mod-time fulln)) +;; ;; (if (file-exists? fulln) ;; time2 is nfs file time +;; ;; (file-modification-time fulln) +;; ;; (begin +;; ;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln) +;; ;; 0))) +;; (changed (> (- time1 time2) (+ (random 5) 1))) ;; it has been at some few seconds since last synced +;; (changed10 (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd +;; (jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy? +;; (do-cp (cond +;; ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover +;; (cons #t (conc "File "fulln" not found! Copying "fname" to "fulln))) +;; ((and (not jfile-exists) changed) +;; (cons #t "not busy, changed")) ;; not busy and changed +;; ((and jfile-exists changed10) +;; (cons #t "busy but not synced in a while")) ;; busy but not sync'd in over 10 seconds +;; ((and changed *time-to-exit*) +;; (cons #t "Time to exit, forced final sync")) ;; last sync +;; (else +;; (cons #f "No sync needed"))))) +;; (if (car do-cp) +;; (let* ((start-time (current-milliseconds)) +;; (fname (pathname-file file)) +;; (runid (if (string= fname "main") #f (string->number fname)))) +;; (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: " +;; fname", delta: " (- time1 time2) " seconds, reason: "(cdr do-cp)) +;; (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db) +;; (hash-table-set! sync-durations (conc fname".db") +;; (- (current-milliseconds) start-time))) +;; (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date") +;; ))) +;; dbfiles) +;; ;; WHY does the dbdat need to be added back? +;; (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)) +;; ) +;; #t) + +(define (db:kill-servers) + (let* ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath*)) (for-each - (lambda (file) - (debug:print-info 3 *default-log-port* "file: " file) - (let* ((fname (conc (pathname-file file) ".db")) - (fulln (conc *toppath*"/.megatest/"fname)) - (time1 (if (file-exists? file) - (file-modification-time file) - (begin - (debug:print-info 2 *default-log-port* "Sync - I do not see file "file) - 1))) - (time2 (if (file-exists? fulln) - (file-modification-time fulln) - (begin - (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln) - 0))) - (changed (> (- time1 time2) (+ (random 5) 1))) ;; it has been at some few seconds since last synced - (changed10 (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd - (jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy? - (do-cp (cond - ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover - (cons #t (conc "File "fulln" not found! Copying "fname" to "fulln))) - ((and (not jfile-exists) changed) - (cons #t "not busy, changed")) ;; not busy and changed - ((and jfile-exists changed10) - (cons #t "busy but not synced in a while")) ;; busy but not sync'd in over 10 seconds - ((and changed *time-to-exit*) - (cons #t "Time to exit, forced final sync")) ;; last sync - (else - (cons #f "No sync needed"))))) - (if (car do-cp) - (let* ((start-time (current-milliseconds)) - (fname (pathname-file file)) - (runid (if (string= fname "main") #f (string->number fname)))) - (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: " - fname", delta: " (- time1 time2) " seconds, reason: "(cdr do-cp)) - (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db) - (hash-table-set! sync-durations (conc fname".db") - (- (current-milliseconds) start-time))) - (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date") - ))) - dbfiles) - (if dbdat (dbfile:add-dbdat dbstruct #f dbdat))) - #t) + (lambda (server) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) server)) + (if (and host pid) + (tasks:kill-server host pid))))) + servers) + (delete-file* (common:get-sync-lock-filepath)))) ;; options: ;; ;; 'killservers - kills all servers ;; 'dejunk - removes junk records @@ -473,117 +556,97 @@ ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) (data-synced 0) ;; count of changed records - (tmp-area (common:get-db-tmp-area)) - (old2new (member 'old2new options)) - (dejunk (member 'dejunk options)) - (killservers (member 'killservers options)) - (servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*)) - (src-area (if old2new *toppath* tmp-area)) - (dest-area (if old2new tmp-area *toppath*)) - (dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db")))) - (keys (db:get-keys dbstruct)) - (sync-durations (make-hash-table))) - - - (if killservers - (begin - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) server)) - (if (and host pid) - (tasks:kill-server host pid))))) - servers) - (delete-file* (common:get-sync-lock-filepath)) - ) - ) - (for-each - (lambda (srcfile) - (debug:print-info 3 *default-log-port* "file: " srcfile) - (let* ((fname (conc (pathname-file srcfile) ".db")) - (basename (pathname-file srcfile)) - (run-id (if (string= basename "main") #f (string->number basename))) - (destfile (conc dest-area "/.megatest/" fname)) - (dest-directory (conc dest-area "/.megatest/")) - (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile)) - (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk)) - (time1 (file-modification-time srcfile)) - (time2 (if (file-exists? destfile) - (begin - (debug:print-info 2 *default-log-port* "destfile " destfile " exists") - (file-modification-time destfile) - ) - (begin - (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile) - 0))) - (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds - - (do-cp (cond - ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover - (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile) - (system (conc "/bin/mkdir -p " dest-directory)) - (system (conc "/bin/cp " srcfile " " destfile)) - #t) - (changed ;; (and changed - ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed. - #t) - ((and changed *time-to-exit*) ;; last sync - #t) - (else - #f)))) - (if (or dejunk do-cp) - (let* ( - (start-time (current-milliseconds)) - - (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) - (mtdb (dbr:subdb-mtdbdat subdb)) - (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc)) - - ) - (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds") - - (if old2new - (begin - (if dejunk (db:clean-up run-id mtdb)) - (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f mtdb tmpdb) - ) - (begin - (if dejunk (db:clean-up run-id tmpdb)) - (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb) - ) - ) - (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time))) - (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date") - ) - ) - ) - dbfiles - ) - data-synced - ) -) + (tmp-area (common:get-db-tmp-area)) + (old2new (member 'old2new options)) + (dejunk (member 'dejunk options)) + (killservers (member 'killservers options)) + (src-area (if old2new *toppath* tmp-area)) + (dest-area (if old2new tmp-area *toppath*)) + (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db")) (glob (conc tmp-area "/.mtdt/*.db")))) + (keys (db:get-keys dbstruct)) + (sync-durations (make-hash-table))) + + ;; kill servers + (if killservers (db:kill-servers)) + + (if (not dbfiles) + (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.mtdb")) + (for-each + (lambda (srcfile) + (debug:print-info 3 *default-log-port* "file: " srcfile) + (let* ((fname (conc (pathname-file srcfile) ".db")) + (basename (pathname-file srcfile)) + (run-id (if (string= basename "main") #f (string->number basename))) + (destfile (conc dest-area "/.mtdb/" fname)) + (dest-directory (conc dest-area "/.mtdb/")) + (time1 (file-modification-time srcfile)) + (time2 (if (file-exists? destfile) + (begin + (debug:print-info 2 *default-log-port* "destfile " destfile " exists") + (file-modification-time destfile)) + (begin + (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile) + 0))) + (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds + + (do-cp (cond + ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover + (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile) + ;; TODO: Need to fix this for WAL mod. Can't just copy. + (system (conc "/bin/mkdir -p " dest-directory)) + (system (conc "/bin/cp " srcfile " " destfile)) + #t) + (changed ;; (and changed + #t) + ((and changed *time-to-exit*) ;; last sync + #t) + (else + #f)))) + (if (or dejunk do-cp) + (let* ((start-time (current-milliseconds)) + ;; subdb is misnamed - should be dbdat (I think...) + (subdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc)) + ;; (or (dbfile:get-subdb dbstruct run-id) + ;; (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) + (mtdb (dbr:subdb-mtdbdat subdb)) + ;; + ;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .mtdb/.db + ;; + (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) + + (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds") + (if old2new + (begin + (if dejunk (db:clean-up run-id mtdb)) + (db:sync-tables (db:sync-all-tables-list + dbstruct + (db:get-keys dbstruct)) + #f mtdb tmpdb)) + (begin + (if dejunk (db:clean-up run-id tmpdb)) + (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb))) + (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time))) + (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")))) + dbfiles)) + data-synced)) ;; Sync all changed db's ;; (define (db:tmp->megatest.db-sync dbstruct run-id last-update) (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))) (res '())) (for-each (lambda (subdb) - (let* ((dbname (db:run-id->dbname run-id)) - (mtdb (dbr:subdb-mtdb subdb)) + (let* ((mtdb (dbr:subdb-mtdb subdb)) (tmpdb (db:get-subdb dbstruct run-id)) (refndb (dbr:subdb-refndb subdb)) (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) + ;; BUG: verify this is really needed (dbfile:add-dbdat dbstruct run-id tmpdb) (set! res (cons newres res)))) subdbs) res)) @@ -648,13 +711,23 @@ ;; exn ;; (begin ;; (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'") ;; (exit)) (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") - (for-each (lambda (key) - (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) - keys) + (for-each + (lambda (key) + (let* ((fieldname #f) + (fieldtype #f)) + (sqlite3:for-each-row + (lambda (fn ft) + (set! fieldname fn) + (set! fieldtype ft)) + db + "SELECT fieldname,fieldtype FROM keys WHERE fieldname=?" key) + (if (not fieldname) + (sqlite3:execute db "INSERT INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")))) + keys) (sqlite3:execute db (conc "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " fieldstr (if havekeys "," "") " runname TEXT DEFAULT 'norun', contour TEXT DEFAULT '', @@ -745,11 +818,21 @@ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, CONSTRAINT metadat_constraint UNIQUE (var));") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") ;; Must do this *after* running patch db !! No more. ;; cannot use db:set-var since it will deadlock, hardwire the code here - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature)) + (let* ((prev-version #f) + (curr-version (common:version-signature))) + (sqlite3:for-each-row + (lambda (ver) + (set! prev-version ver)) + db + "SELECT val FROM metadat WHERE var='MEGATEST_VERSION';") + (if prev-version + (if (not (equal? prev-version curr-version)) + (sqlite3:execute db "UPDATE metadat SET val=? WHERE var=?;" curr-version "MEGATEST_VERSION")) + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" curr-version) )) (debug:print-info 11 *default-log-port* "db:initialize END") ;; )))) ;;====================================================================== ;; R U N S P E C I F I C D B ;;====================================================================== @@ -840,19 +923,21 @@ test_id INTEGER, update_time TIMESTAMP, cpuload INTEGER DEFAULT -1, diskfree INTEGER DEFAULT -1, diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);") + run_duration INTEGER DEFAULT 0, + last_update INTEGER DEFAULT (strftime('%s','now')));") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives ( id INTEGER PRIMARY KEY, test_id INTEGER, state TEXT DEFAULT 'new', status TEXT DEFAULT 'n/a', archive_type TEXT DEFAULT 'bup', du INTEGER, - archive_path TEXT);"))) + archive_path TEXT, + last_update INTEGER DEFAULT (strftime('%s','now')));"))) (db:create-triggers db) db)) ;; ) ;;====================================================================== ;; A R C H I V E S @@ -886,10 +971,11 @@ "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND last_df > ?;") dneeded)) + ;; BUG: Verfify this is really needed (dbfile:add-dbdat dbstruct #f dbdat) blocks)) ;; returns id of the record, register a disk allocated to archiving and record it's last known ;; available space @@ -952,11 +1038,11 @@ ;; (define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) (db:with-db dbstruct run-id - #f + #t (lambda (dbdat db) (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;" archive-block-id test-id)))) ;; Look up the archive block info given a block-id @@ -990,220 +1076,64 @@ ;;====================================================================== ;; M A I N T E N A N C E ;;====================================================================== -(define (db:have-incompletes? dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) - (deadtime (if (and deadtime-str - (string->number deadtime-str)) - (string->number deadtime-str) - 72000))) ;; twenty hours - (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - (if (and (null? incompleted) - (null? oldlaunched) - (null? toplevels)) - #f - #t))))) - -(define (db:get-status-from-final-status-file run-dir) - (let ((infile (conc run-dir "/.final-status"))) - ;; first verify we are able to write the output file - (if (not (file-read-access? infile)) - (begin - (debug:print 2 *default-log-port* "ERROR: cannot read " infile) - (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir) - #f - ) - (with-input-from-file infile read-lines) - ))) - -;; select end_time-now from -;; (select testname,item_path,event_time+run_duration as -;; end_time,strftime('%s','now') as now from tests where state in -;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); - -(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - ;; The default running-deadtime is 720 seconds = 12 minutes. - ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) - (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) - (server-start-allowance 200) - (server-overloaded-budget 200) - (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) - (launch-monitor-on-time-budget 30) - (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) - (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) - (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) - (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) - (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) - ) - (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) - (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) - - (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - (let* ((stmth1 (db:get-cache-stmth - dbdat run-id db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) - AND state IN ('RUNNING');")) - (stmth2 (db:get-cache-stmth - dbdat run-id db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) - AND state IN ('REMOTEHOSTSTART');")) - (stmth3 (db:get-cache-stmth - dbdat run-id db - "SELECT id,rundir,uname,testname,item_path FROM tests - WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 - AND state IN ('LAUNCHED');"))) - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) - (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" - test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds) - " event-time="event-time" run-duration="run-duration)))) - stmth1 - run-id running-deadtime) ;; default time 720 seconds - - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id - " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time - " run-duration="run-duration) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) - stmth2 - run-id remotehoststart-deadtime) ;; default time 230 seconds - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id - " 1 day since event_time marked") - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) - stmth3 - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " - (length toplevels) " old LAUNCHED toplevel tests and " - (length incompleted) " tests marked RUNNING but apparently dead.")) - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - ;; (launch:is-test-alive "localhost" 435) - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") - " as DEAD") - (for-each - (lambda (test-id) - (let* (;; (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id)) - (tinfo (db:get-test-info-by-id dbstruct run-id test-id)) - (run-dir (db:test-get-rundir tinfo)) - (host (db:test-get-host tinfo)) - (pid (db:test-get-process_id tinfo)) - (result (db:get-status-from-final-status-file run-dir))) - (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") - (db:set-state-status-and-roll-up-items - dbstruct run-id test-id 'foo "COMPLETED" "PASS" - "Test stopped responding but it has PASSED; marking it PASS in the DB.")) - (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. - (launch:is-test-alive host pid)))) - (if is-alive - (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host - " has a process on pid " pid ", NOT setting to DEAD.") - (begin - (debug:print 0 *default-log-port* "INFO: test " test-id - " final state/status is not COMPLETED/PASS. It is " result) - (db:set-state-status-and-roll-up-items - dbstruct run-id test-id 'foo "COMPLETED" "DEAD" - "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) - ;; call end of eud of run detection for posthook - from merge, is it needed? - ;; (launch:end-of-run-check run-id) - all-ids) - ;;call end of eud of run detection for posthook - (launch:end-of-run-check run-id) - ))))))) +;; (define (db:have-incompletes? dbstruct run-id ovr-deadtime) +;; (let* ((incompleted '()) +;; (oldlaunched '()) +;; (toplevels '()) +;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) +;; (deadtime (if (and deadtime-str +;; (string->number deadtime-str)) +;; (string->number deadtime-str) +;; 72000))) ;; twenty hours +;; (db:with-db +;; dbstruct run-id #f +;; (lambda (dbdat db) +;; (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) +;; +;; ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes +;; ;; +;; ;; HOWEVER: this code in run:test seems to work fine +;; ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) +;; ;; (db:test-get-run_duration testdat))) +;; ;; 600) +;; ;; (db:delay-if-busy dbdat) +;; (sqlite3:for-each-row +;; (lambda (test-id run-dir uname testname item-path) +;; (if (and (equal? uname "n/a") +;; (equal? item-path "")) ;; this is a toplevel test +;; ;; what to do with toplevel? call rollup? +;; (begin +;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) +;; (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) +;; (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) +;; (db:get-cache-stmth dbdat db +;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") +;; run-id deadtime) +;; +;; ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config +;; ;; +;; ;; (db:delay-if-busy dbdat) +;; (sqlite3:for-each-row +;; (lambda (test-id run-dir uname testname item-path) +;; (if (and (equal? uname "n/a") +;; (equal? item-path "")) ;; this is a toplevel test +;; ;; what to do with toplevel? call rollup? +;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) +;; (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) +;; (db:get-cache-stmth dbdat db +;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") +;; run-id) +;; +;; (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") +;; (if (and (null? incompleted) +;; (null? oldlaunched) +;; (null? toplevels)) +;; #f +;; #t))))) ;; BUG: Probably broken - does not explicitly use run-id in the query ;; (define (db:top-test-set-per-pf-counts dbstruct run-id test-name) (db:general-call dbstruct run-id 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) @@ -1317,141 +1247,56 @@ ;; (db:find-and-mark-incomplete db) ;; (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;") dead-runs)) -;;====================================================================== -;; M E T A G E T A N D S E T V A R S -;;====================================================================== - -;; returns number if string->number is successful, string otherwise -;; also updates *global-delta* -;; -(define (db:get-var dbstruct var) - (let* ((res #f)) - (db:with-db - dbstruct #f #f ;; for the moment vars are only stored in main.db - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - "SELECT val FROM metadat WHERE var=?;" var) - ;; convert to number if can - (if (string? res) - (let ((valnum (string->number res))) - (if valnum (set! res valnum)))) - res)))) - -(define (db:inc-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var)))) - -(define (db:dec-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var)))) - -;; This was part of db:get-var. It was used to estimate the load on -;; the database files. -;; -;; scale by 10, average with current value. -;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) -;; (if throttle throttle 0.01))) -;; 2)) -;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit -;; (begin -;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) -;; (set! *last-global-delta-printed* *global-delta*))) - -(define (db:set-var dbstruct var val) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) - -(define (db:add-var dbstruct var val) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var)))) - -(define (db:del-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (dbdat db) - (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) - ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== -(define (db:no-sync-db db-in) - (if db-in - db-in - (if *no-sync-db* - *no-sync-db* - (begin - (mutex-lock! *db-access-mutex*) - (let ((dbpath (common:get-db-tmp-area)) - (db (dbfile:open-no-sync-db dbpath))) - (set! *no-sync-db* db) - (mutex-unlock! *db-access-mutex*) - db))))) - -(define (with-no-sync-db proc) - (let* ((db (db:no-sync-db *no-sync-db*))) - (proc db))) - +(define (db:get-dbsync-path) + (case (rmt:transport-mode) + ((http)(common:get-db-tmp-area)) + ((tcp) (conc *toppath*"/.mtdb")) + ((nfs) (conc *toppath*"/.mtdb")) + (else "/tmp/dunno-this-gonna-exist"))) + +;; This is needed for api.scm (define (db:open-no-sync-db) - (dbfile:open-no-sync-db (db:dbfile-path))) - -(define (db:no-sync-close-db db stmt-cache) - (db:safely-close-sqlite3-db db stmt-cache)) - - -;; use a global for some primitive caching, it is just silly to -;; re-read the db over and over again for the keys since they never -;; change - + (dbfile:open-no-sync-db (db:get-dbsync-path))) + ;; why get the keys from the db? why not get from the *configdat* ;; using keys:config-get-fields? (define (db:get-keys dbstruct) - (keys:config-get-fields *configdat*) -) - -;; (if *db-keys* *db-keys* -;; (let ((res '())) -;; (db:with-db dbstruct #f #f -;; (lambda (dbdat db) -;; (sqlite3:for-each-row -;; (lambda (key) -;; (set! res (cons key res))) -;; db -;; "SELECT fieldname FROM keys ORDER BY id DESC;"))) -;; (set! *db-keys* res) -;; res))) + (keys:config-get-fields *configdat*)) ;; extract index number given a header/data structure (define (db:get-index-by-header header field) (list-index (lambda (x)(equal? x field)) header)) ;; look up values in a header/data structure (define (db:get-value-by-header row header field) - (if (or (null? header) (not row)) - #f - (let loop ((hed (car header)) - (tal (cdr header)) - (n 0)) - (if (equal? hed field) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" - row " header=" header " field=" field ", exn=" exn) - #f) - (vector-ref row n)) - (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) + (let ((len (if (vector? row) + (vector-length row) + 0))) + (if (or (null? header) (not row)) + #f + (let loop ((hed (car header)) + (tal (cdr header)) + (n 0)) + (if (equal? hed field) + (handle-exceptions + exn + (begin + (debug:print 4 *default-log-port* "WARNING: attempt to read non-existant field, row=" + row " header=" header " field=" field ", exn=" exn) + #f) + (if (>= n len) + #f + (vector-ref row n))) + (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))) ;; Accessors for the header/data structure ;; get rows and header from (define (db:get-header vec)(vector-ref vec 0)) (define (db:get-rows vec)(vector-ref vec 1)) @@ -1458,33 +1303,26 @@ ;;====================================================================== ;; R U N S ;;====================================================================== - - - - (define (db:get-run-times dbstruct run-patt target-patt) (let ((res `()) - (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) -;(print qry) -(db:with-db + (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) + ;(print qry) + (db:with-db dbstruct #f ;; this is for the main runs db #f ;; does not modify db (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (runname runtime target ) - (set! res (cons (vector runname runtime target) res))) - db - qry - run-patt target-patt) - - res)))) - - + (sqlite3:for-each-row + (lambda (runname runtime target ) + (set! res (cons (vector runname runtime target) res))) + db + qry + run-patt target-patt) + res)))) (define (db:get-run-name-from-id dbstruct run-id) (db:with-db dbstruct #f ;; this is for the main runs db @@ -1551,11 +1389,11 @@ ;; (debug:print 0 *default-log-port* "Got here 0.") (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (db:with-db - dbstruct #f #f + dbstruct #f #t (lambda (dbdat db) ;; (debug:print 0 *default-log-port* "Got here 1.") (let ((res #f)) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" @@ -1572,10 +1410,92 @@ res))) (begin (debug:print-error 0 *default-log-port* "Called without all necessary keys") #f)))) +(define (db:get-run-id dbstruct runname target) + (let ((runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update + (if (null? runs) + #f + (simple-run-id (car runs))))) + +;; called with run-id=#f so will operate on main.db +;; +(define (db:insert-run dbstruct target runname run-meta) + (let* ((keys (db:get-keys dbstruct)) + (runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update + ;; need to insert run based on target and runname + (let* ((targvals (string-split target "/")) + (keystr (string-intersperse keys ",")) + (key?str (string-intersperse (make-list (length targvals) "?") ",")) + (qrystr (conc "INSERT INTO runs (runname,"keystr") VALUES (?,"key?str")")) + (get-var (lambda (db qrystr) + (let* ((res #f)) + (sqlite3:for-each-row + (lambda row + (set res (car row))) + db qrystr runname) + res)))) + (if (null? runs) + (db:create-initial-run-record dbstruct runname target)) + (let* ((run-id (db:get-run-id dbstruct runname target))) + (db:with-db + dbstruct + #f #t + (lambda (dbdat db) + (for-each + (lambda (keyval) + (let* ((fieldname (car keyval)) + (getqry (conc "SELECT "fieldname" FROM runs WHERE id=?;")) + (setqry (conc "UPDATE runs SET "fieldname"=? WHERE id=?;")) + (val (cdr keyval)) + (valnum (if (number? val) + val + (if (string? val) + (string->number val) + #f)))) + (if (not (member fieldname (cons "runname" keys))) ;; don't attempt to tweak these + (let* ((curr-val (get-var db getqry)) + (have-it (or (equal? curr-val val) + (equal? curr-val valnum)))) + (if (not have-it) + (sqlite3:execute db setqry (or valnum val) run-id)))))) + run-meta))) + run-id)))) + +(define (db:create-initial-run-record dbstruct runname target) + (let* ((keys (db:get-keys dbstruct)) + (targvals (string-split target "/")) + (keystr (string-intersperse keys ",")) + (key?str (string-intersperse (make-list (length targvals) "?") ",")) + (qrystr (conc "INSERT INTO runs (runname,"keystr") VALUES (?,"key?str")"))) + (db:with-db + dbstruct #f #t + (lambda (dbdat db) + (apply sqlite3:execute db qrystr runname targvals))))) + +(define (db:insert-test dbstruct run-id test-rec) + (let* ((testname (alist-ref "testname" test-rec equal?)) + (item-path (alist-ref "item_path" test-rec equal?)) + (id (db:get-test-id dbstruct run-id testname item-path)) + (fieldvals (filter (lambda (x)(not (member (car x) '("id" "last_update")))) test-rec)) + (setqry (conc "UPDATE tests SET "(string-intersperse + (map (lambda (dat) + (conc (car dat)"=?")) + fieldvals) + ",")" WHERE id=?;")) + (insqry (conc "INSERT INTO tests ("(string-intersperse (map (lambda (x) (car x)) fieldvals) ",") + ") VALUES ("(string-intersperse (make-list (length fieldvals) "?") ",")");"))) + (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry) + (db:with-db + dbstruct + run-id #t + (lambda (dbdat db) + (if id + (apply sqlite3:execute db setqry (append (map cdr fieldvals) (list id))) + (apply sqlite3:execute db insqry (map cdr fieldvals))))))) + ;; replace header and keystr with a call to runs:get-std-run-fields ;; ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; runpatts: patt1,patt2 ... ;; @@ -1615,17 +1535,13 @@ qrystr ))) (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) (vector header res))) - -(define-record simple-run target id runname state status owner event_time) -(define-record-printer (simple-run x out) - (fprintf out "#,(simple-run ~S ~S ~S ~S)" - (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) - ;; simple get-runs +;; +;; records used defined in dbfile ;; (define (db:simple-get-runs dbstruct runpatt count offset target last-update) (let* ((res '()) (keys (db:get-keys dbstruct)) (runpattstr (db:patt->like "runname" runpatt)) @@ -1658,23 +1574,23 @@ qrystr ))) (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count) res)) -;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db) +;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ??? ;; ;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the .db!! (define (db:get-changed-run-ids since-time) (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) - (alldbs (glob (conc dbdir "/.megatest/[0-9]*.db"))) + (alldbs (glob (conc dbdir "/.mtdb/[0-9]*.db*"))) (changed (filter (lambda (dbfile) (> (file-modification-time dbfile) since-time)) alldbs))) (delete-duplicates (map (lambda (dbfile) - (let* ((res (string-match ".*\\/(\\d\\d)\\.db" dbfile))) + (let* ((res (string-match ".*\\/(\\d\\d)\\.db*" dbfile))) (if res (string->number (cadr res)) (begin (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") 0)))) @@ -1778,16 +1694,15 @@ ;; Update run_stats for given run_id ;; input data is a list (state status count) ;; (define (db:update-run-stats dbstruct run-id stats) - ;; (mutex-lock! *db-transaction-mutex*) + (mutex-lock! *db-transaction-mutex*) (db:with-db dbstruct #f - #f - + #t (lambda (dbdat db) ;; remove previous data (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) @@ -1800,11 +1715,11 @@ (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) (apply sqlite3:execute stmt2 run-id dat)) stats))))) (sqlite3:finalize! stmt1) (sqlite3:finalize! stmt2) - ;; (mutex-unlock! *db-transaction-mutex*) + (mutex-unlock! *db-transaction-mutex*) res)))) (define (db:get-main-run-stats dbstruct run-id) (db:with-db dbstruct @@ -1942,19 +1857,20 @@ (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) (vector header (reverse - (db:with-db dbstruct #f #f ;; reads db, does not write to it. - (lambda (dbdat db) - (sqlite3:fold-row - (lambda (res . r) - (cons (list->vector r) res)) - '() - db - qry-str - runnamepatt))))))) + (db:with-db + dbstruct #f #f ;; reads db, does not write to it. + (lambda (dbdat db) + (sqlite3:fold-row + (lambda (res . r) + (cons (list->vector r) res)) + '() + db + qry-str + runnamepatt))))))) ;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) ;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector ;; this is inconsistent with get-runs but it makes some sense. ;; @@ -1991,11 +1907,11 @@ run-id)))) ;; does not (obviously!) removed dependent data. But why not!!? (define (db:delete-run dbstruct run-id) (db:with-db - dbstruct #f #f + dbstruct #f #t (lambda (dbdat db) (sqlite3:with-transaction db (lambda () (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) @@ -2023,34 +1939,38 @@ user (conc newlockval " " run-id)) (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) (define (db:set-run-status dbstruct run-id status msg) (db:with-db - dbstruct #f #f + dbstruct #f #t (lambda (dbdat db) (if msg (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) +(define (db:set-run-state-status-db dbdat db run-id state status ) + (sqlite3:execute + (db:get-cache-stmth + dbdat db "UPDATE runs SET status=?,state=? WHERE id=?;") status state run-id)) + (define (db:set-run-state-status dbstruct run-id state status ) (db:with-db - dbstruct #f #f + dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) - - - + (db:set-run-state-status-db dbdat db run-id state status)))) + (define (db:get-run-status dbstruct run-id) (let ((res "n/a")) (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (status) (set! res status)) - db - "SELECT status FROM runs WHERE id=?;" + (db:get-cache-stmth + dbdat db + "SELECT status FROM runs WHERE id=?;" ) run-id) res)))) (define (db:get-run-state dbstruct run-id) (let ((res "n/a")) @@ -2058,12 +1978,27 @@ dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (status) (set! res status)) - db - "SELECT state FROM runs WHERE id=?;" + (db:get-cache-stmth + dbdat db + "SELECT state FROM runs WHERE id=?;" ) + run-id) + res)))) + +(define (db:get-run-state-status dbstruct run-id) + (let ((res (cons "n/a" "n/a"))) + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (state status) + (set! res (cons state status))) + (db:get-cache-stmth + dbdat db + "SELECT state,status FROM runs WHERE id=?;" ) run-id) res)))) ;;====================================================================== @@ -2306,12 +2241,12 @@ (sqlite3:for-each-row (lambda (run-id testname item-path state status) ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) db - "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" - test-id))) + "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" + test-id run-id))) res)) ;; get a useful subset of the tests data (used in dashboard ;; use db:mintest-get-{id ,run_id,testname ...} ;; @@ -2338,11 +2273,11 @@ (define (db:delete-test-records dbstruct run-id test-id) (db:general-call dbstruct run-id 'delete-test-step-records (list test-id)) (db:general-call dbstruct run-id 'delete-test-data-records (list test-id)) (db:with-db - dbstruct run-id #f + dbstruct run-id #t (lambda (dbdat db) (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) ;; (define (db:delete-old-deleted-test-records dbstruct) @@ -2402,25 +2337,28 @@ ;; NOTE: run-id is not used ;; ;; (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) (db:with-db dbstruct - run-id - #t + run-id #t (lambda (dbdat db) - (cond - ((and newstate newstatus newcomment) - (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) - test-id)) - ((and newstate newstatus) - (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) - (else - (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) - (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) - (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) - test-id)))))) - (mt:process-triggers dbstruct run-id test-id newstate newstatus)) + (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment)))) + +(define (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment) + (cond + ((and newstate newstatus newcomment) + (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) + test-id)) + ((and newstate newstatus) + (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) + (else + (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) + (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) + (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) + test-id)))) + ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function + ) ;; NEW BEHAVIOR: Count tests running in all runs! ;; (define (db:get-count-tests-running dbstruct run-id) ;; fastmode) (let* ((qry ;; (if fastmode @@ -2429,11 +2367,11 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat run-id db qry))) + (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth)))))) ;; NEW BEHAVIOR: Count tests running in only one run! ;; (define (db:get-count-tests-actually-running dbstruct run-id) @@ -2459,11 +2397,11 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat run-id db qry))) + (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth run-id)))))) ;; For a given testname how many items are running? Used to determine ;; probability for regenerating html ;; @@ -2472,11 +2410,11 @@ dbstruct run-id #f (lambda (dbdat db) (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;") - (stmth (db:get-cache-stmth dbdat run-id db stmt))) + (stmth (db:get-cache-stmth dbdat db stmt))) (sqlite3:first-result stmth run-id testname))))) (define (db:get-not-completed-cnt dbstruct run-id) (db:with-db @@ -2552,11 +2490,11 @@ ;; (define (db:test-set-top-process-pid dbstruct run-id test-id pid) (db:with-db dbstruct run-id - #f + #t (lambda (dbdat db) (sqlite3:execute db "UPDATE tests SET attemptnum=? WHERE id=?;" pid test-id)))) (define (db:test-get-top-process-pid dbstruct run-id test-id) @@ -2565,13 +2503,13 @@ run-id #f (lambda (dbdat db) (db:first-result-default db - "SELECT attemptnum FROM tests WHERE id=?;" + "SELECT attemptnum FROM tests WHERE id=? AND run_id=?;" #f - test-id)))) + test-id run-id)))) (define db:test-record-fields '("id" "run_id" "testname" "state" "status" "event_time" "host" "cpuload" "diskfree" "uname" "rundir" "item_path" "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" "last_update")) @@ -2591,11 +2529,11 @@ (define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) (define (db:update-tesdata-on-repilcate-db dbstruct old-lt new-lt) (db:with-db - dbstruct #f #f + dbstruct #f #t (lambda (dbdat db) (sqlite3:execute db "UPDATE tests SET rundir= replace(rundir,?,?), shortdir=replace(shortdir,?,?);" old-lt new-lt old-lt new-lt)))) ;; NOTE: Use db:test-get* to access records @@ -2672,11 +2610,11 @@ (lambda (run-id) (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) (db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs))) run-ids))) -;; Get test data using test_id, run-id is not used +;; Get test data using test_id ;; (define (db:get-test-info-by-id dbstruct run-id test-id) (db:with-db dbstruct run-id @@ -2686,12 +2624,32 @@ (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") - test-id) + ;; (db:get-cache-stmth dbdat db + ;; (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=? AND run_id=?;")) + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=? AND run_id=?;") + test-id run-id) + res)))) + +;; Get test state, status using test_id +;; +(define (db:get-test-state-status-by-id dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (dbdat db) + (let ((res (cons #f #f))) +;; (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=?;"))) + (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test + (lambda (state status) + (cons state status)) + db + "SELECT state,status FROM tests WHERE id=? AND run_id=?;" ;; stmth try not compiling this one - yes, this fixed the bind issue + test-id run-id) res)))) ;; Use db:test-get* to access ;; Get test data using test_ids. NB// Only works within a single run!! ;; @@ -2709,36 +2667,54 @@ db (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" (string-intersperse (map conc test-ids) ",") ");")) res)))) +;; try every second until tries times proc +;; +(define (db:keep-trying-until-true proc params tries) + (let* ((res (apply proc params))) + (if res + res + (if (> tries 0) + (begin + (thread-sleep! 1) + (db:keep-trying-until-true proc params (- tries 1))) + (begin + ;; (debug:print-info 0 *default-log-port* "proc never returned true, params="params) + (print"db:keep-trying-until-true proc never returned true, proc = " proc " params =" params " tries = " tries) + #f))))) + (define (db:get-test-info dbstruct run-id test-name item-path) (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (apply vector a b))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") - test-name item-path run-id) - res)))) + (db:get-test-info-db db run-id test-name item-path)))) + +(define (db:get-test-info-db db run-id test-name item-path) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (apply vector a b))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") + test-name item-path run-id) + res)) (define (db:test-get-rundir-from-test-id dbstruct run-id test-id) (db:with-db dbstruct run-id #f (lambda (dbdat db) (db:first-result-default db - "SELECT rundir FROM tests WHERE id=?;" + "SELECT rundir FROM tests WHERE id=? AND run_id=?;" #f ;; default result - test-id)))) + test-id run-id)))) (define (db:get-test-times dbstruct run-name target) (let ((res `()) (qry (conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") @@ -2843,11 +2819,11 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat #f db stmt)) + (let* ((stmth (db:get-cache-stmth dbdat db stmt)) (res (sqlite3:fold-row (lambda (res id test-id category variable value expected tol units comment status type last-update) (vector id test-id category variable value expected tol units comment status type last-update)) (vector #f #f #f #f #f #f #f #f #f #f #f #f) stmth @@ -2861,11 +2837,11 @@ ;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored (define (db:test-data-rollup dbstruct run-id test-id status) (let* ((fail-count 0) (pass-count 0)) (db:with-db - dbstruct run-id #f + dbstruct run-id #t (lambda (dbdat db) (sqlite3:for-each-row (lambda (fcount pcount) (set! fail-count fcount) (set! pass-count pcount)) @@ -2960,11 +2936,11 @@ ;; EOF (define (db:csv->test-data dbstruct run-id test-id csvdata) (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) (db:with-db - dbstruct #f #f + dbstruct #f #t (lambda (dbdat db) (let* ((csvlist (csv->list (make-csv-reader (open-input-string csvdata) '((strip-leading-whitespace? #t) (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) @@ -3168,163 +3144,171 @@ (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) ;; establish info on incoming test followed by info on top level test ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met (let* ((testdat (if (number? test-name) (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id - (db:get-test-info dbstruct run-id test-name item-path))) + (db:keep-trying-until-true ;; in our threaded stuff this call could happen before the test is registered (maybe?) + db:get-test-info + (list dbstruct run-id test-name item-path) + 10))) (test-id (db:test-get-id testdat)) (test-name (if (number? test-name) (db:test-get-testname testdat) test-name)) (item-path (db:test-get-item-path testdat)) (tl-testdat (db:get-test-info dbstruct run-id test-name "")) (tl-test-id (if tl-testdat (db:test-get-id tl-testdat) - #f))) + #f)) + (new-state-eh #f) + (new-status-eh #f)) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (db:general-call dbstruct run-id 'set-test-start-time (list test-id))) (mutex-lock! *db-transaction-mutex*) (db:with-db - dbstruct run-id #f + dbstruct run-id #t (lambda (dbdat db) (let ((tr-res (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status + (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test - (state-statuses (db:roll-up-rules state-status-counts state status)) - (newstate (car state-statuses)) - (newstatus (cadr state-statuses))) + (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (state-statuses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-statuses)) + (newstatus (cadr state-statuses))) + (set! new-state-eh newstate) + (set! new-status-eh newstatus) (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " - (apply conc - (map (lambda (x) - (conc - (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) - state-status-counts))); end debug:print - - (if tl-test-id - (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + (if tl-test-id + (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct )))))) (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) + (if new-state-eh ;; moved from db:test-set-state-status + (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh)) tr-res))))) (define (db:roll-up-rules state-status-counts state status) - (let* ((running (length (filter (lambda (x) - (member (dbr:counts-state x) *common:running-states*)) - state-status-counts))) - (bad-not-started (length (filter (lambda (x) - (and (equal? (dbr:counts-state x) "NOT_STARTED") - (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) - state-status-counts))) - (all-curr-states (common:special-sort ;; worst -> best (sort of) - (delete-duplicates - (if (and state (not (member state *common:dont-roll-up-states*))) - (cons state (map dbr:counts-state state-status-counts)) - (map dbr:counts-state state-status-counts))) - *common:std-states* >)) - (all-curr-statuses (common:special-sort ;; worst -> best - (delete-duplicates - (if (and state status (not (member state *common:dont-roll-up-states*))) - (cons status (map dbr:counts-status state-status-counts)) - (map dbr:counts-status state-status-counts))) - *common:std-statuses* >)) - (non-completes (filter (lambda (x) - (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) - all-curr-states)) - (preq-fails (filter (lambda (x) - (equal? x "PREQ_FAIL")) - all-curr-statuses)) - (num-non-completes (length non-completes)) - (newstate (cond - ((> running 0) "RUNNING") ;; anything running, call the situation running - ((> (length preq-fails) 0) "NOT_STARTED") - ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. - ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED - (else (car all-curr-states)))) - (newstatus (cond - ((> (length preq-fails) 0) "PREQ_FAIL") - ((or (> bad-not-started 0) - (and (equal? newstate "NOT_STARTED") - (> num-non-completes 0))) - "STARTED") - (else (car all-curr-statuses))))) - (debug:print-info 2 *default-log-port* - "\n--> probe db:set-state-status-and-roll-up-items: " - "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) - "\n--> running: "running - "\n--> bad-not-started: "bad-not-started - "\n--> non-non-completes: "num-non-completes - "\n--> non-completes: "non-completes - "\n--> all-curr-states: "all-curr-states - "\n--> all-curr-statuses: "all-curr-statuses - "\n--> newstate "newstate - "\n--> newstatus "newstatus - "\n\n") - - ;; NB// Pass the db so it is part of the transaction - (list newstate newstatus))) + (if (null? state-status-counts) + '(#f #f) + (let* ((running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (if (and state (not (member state *common:dont-roll-up-states*))) + (cons state (map dbr:counts-state state-status-counts)) + (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (if (and state status (not (member state *common:dont-roll-up-states*))) + (cons status (map dbr:counts-status state-status-counts)) + (map dbr:counts-status state-status-counts))) + *common:std-statuses* >)) + (non-completes (filter (lambda (x) + (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) + all-curr-states)) + (preq-fails (filter (lambda (x) + (equal? x "PREQ_FAIL")) + all-curr-statuses)) + (num-non-completes (length non-completes)) + (newstate (cond + ((> running 0) "RUNNING") ;; anything running, call the situation running + ((> (length preq-fails) 0) "NOT_STARTED") + ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. + ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED + (else (car all-curr-states)))) + (newstatus (cond + ((> (length preq-fails) 0) "PREQ_FAIL") + ((or (> bad-not-started 0) + (and (equal? newstate "NOT_STARTED") + (> num-non-completes 0))) + "STARTED") + (else (car all-curr-statuses))))) + (debug:print-info 2 *default-log-port* + "\n--> probe db:set-state-status-and-roll-up-items: " + "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) + "\n--> running: "running + "\n--> bad-not-started: "bad-not-started + "\n--> non-non-completes: "num-non-completes + "\n--> non-completes: "non-completes + "\n--> all-curr-states: "all-curr-states + "\n--> all-curr-statuses: "all-curr-statuses + "\n--> newstate "newstate + "\n--> newstatus "newstatus + "\n\n") + + ;; NB// Pass the db so it is part of the transaction + (list newstate newstatus)))) (define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) (mutex-lock! *db-transaction-mutex*) (db:with-db - dbstruct run-id #f + dbstruct run-id #t (lambda (dbdat db) (let ((tr-res (sqlite3:with-transaction db (lambda () - (let* ((state-status-counts (db:get-all-state-status-counts-for-run db run-id)) + (let* ((state-status-counts (db:get-all-state-status-counts-for-run-db dbdat db run-id)) (state-statuses (db:roll-up-rules state-status-counts #f #f )) (newstate (car state-statuses)) (newstatus (cadr state-statuses))) (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) - (db:set-run-state-status db run-id newstate newstatus ))))))) + (db:set-run-state-status-db dbdat db run-id newstate newstatus ))))))) (mutex-unlock! *db-transaction-mutex*) tr-res)))) + +(define (db:get-all-state-status-counts-for-run-db dbdat db run-id) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + (db:get-cache-stmth + dbdat db + "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;") + run-id )) (define (db:get-all-state-status-counts-for-run dbstruct run-id) - (let* ((test-count-recs (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" - run-id ))))) - test-count-recs)) - + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:get-all-state-status-counts-for-run-db dbdat db run-id)))) ;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* ;; ;; NOTE: This is called within a transaction ;; -(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) - (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) +(define (db:get-all-state-status-counts-for-test db run-id test-name item-path item-state-in item-status-in) + (let* ((test-info (db:get-test-info-db db run-id test-name item-path)) (item-state (or item-state-in (db:test-get-state test-info))) (item-status (or item-status-in (db:test-get-status test-info))) - (other-items-count-recs (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - ;; ignore current item because we have changed its value in the current transation so this select will see the old value. - "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" - run-id test-name item-path)))) - + (other-items-count-recs (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + ;; ignore current item because we have changed its value in the current transation so this select will see the old value. + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + run-id test-name item-path)) ;; add current item to tally outside of sql query - (match-countrec-lambda (lambda (countrec) - (and (equal? (dbr:counts-state countrec) item-state) + (match-countrec-lambda (lambda (countrec) + (and (equal? (dbr:counts-state countrec) item-state) (equal? (dbr:counts-status countrec) item-status)))) - + (already-have-count-rec-list (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status (updated-count-rec (if (null? already-have-count-rec-list) (make-dbr:counts state: item-state status: item-status count: 1) @@ -3334,11 +3318,10 @@ (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) (unrelated-rec-list (filter nonmatch-countrec-lambda other-items-count-recs))) - (cons updated-count-rec unrelated-rec-list))) ;; (define (db:get-all-item-states db run-id test-name) ;; (sqlite3:map-row ;; (lambda (a) a) @@ -3542,19 +3525,21 @@ (else (hash-table-set! *logged-in-clients* client-signature (current-seconds)) '(#t "successful login")))) +;; NO WAY TO KNOW IF IT MODIFIES THE DB BUT NEARLY ALL ARE UPDATES/INSERTS +;; (define (db:general-call dbstruct run-id stmtname params) ;; Why is db:lookup-query above not used here to get the query? (let ((query (let ((q (alist-ref (if (string? stmtname) (string->symbol stmtname) stmtname) db:queries))) (if q (car q) #f)))) (db:with-db - dbstruct run-id #f + dbstruct run-id #t (lambda (dbdat db) (apply sqlite3:execute db query params) #t)))) ;; get a summary of state and status counts to calculate a rollup @@ -3762,19 +3747,19 @@ testname) res)))) ;; create a new record for a given testname (define (db:testmeta-add-record dbstruct testname) - (db:with-db dbstruct #f #f + (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute db "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) ;; update one of the testmeta fields (define (db:testmeta-update-field dbstruct testname field value) - (db:with-db dbstruct #f #f + (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute db (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) @@ -4032,12 +4017,10 @@ ;; To sync individual run ;;====================================================================== (define (db:get-run-record-ids dbstruct target run keynames test-patt) (let* ((backcons (lambda (lst item)(cons item lst))) (all_tests '()) - (all_test_steps '()) - (all_test_data '()) (keystr (string-intersperse (map (lambda (key val) (conc key " like '" val "'")) keynames (string-split target "/")) @@ -4063,39 +4046,15 @@ ) ) ) all_tests ) ) - (set! all_test_steps - (append - (map (lambda (x) (cons x run_id)) - (db:with-db dbstruct run_id #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_steps WHERE test_id in (" test-qry ")")) - ) - ) - ) all_test_steps - ) - ) - (set! all_test_data - (append - (map (lambda (x) (cons x run_id)) - (db:with-db dbstruct run_id #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_data WHERE test_id in (" test-qry ")")) - ) - ) - ) all_test_data - ) - ) ) run_ids ) `((runs . ,run_ids) (tests . ,all_tests) - (test_steps . ,all_test_steps) - (test_data . ,all_test_data) ) ) ) @@ -4103,17 +4062,27 @@ ;; Just for sync, procedures to make sync easy ;;====================================================================== ;; get an alist of run ids and test/run, test_step/run pairs changed since time since-time ;; '((runs . (1 2 3 ...))(tests . ((5 . 1) (6 . 3) (6 . 2) (7 . 1) ... + +;; Retrieves record IDs from the database based on the timestamp of their last update. + +;; The function takes two arguments: dbstruct, which represents the database structure, and since-time, which is a timestamp indicating the time of the last update. +;; The function first defines a few helper functions, including backcons, which takes a list and an item and adds the item to the front of the list. +;; It then initializes several variables to empty lists: all_tests, all_test_steps, all_test_data, all_run_ids, and all_test_ids. +;; The function then retrieves a list of IDs for runs that have been changed since since-time using the db:get-changed-run-ids function. +;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo 100. +;; For each changed run ID, the function retrieves a list of test IDs, test step IDs, and test data IDs that have been updated since since-time. +;; It appends these IDs to the appropriate lists (all_tests, all_test_steps, and all_test_data) using the append and map functions. +;; The function then retrieves a list of run stat IDs that have been updated since since-time. +;; Finally, the function returns a list of associations between record types and their corresponding IDs: runs, tests, test_steps, test_data, and run_stats. ;; (define (db:get-changed-record-ids dbstruct since-time) ;; no transaction, allow the db to be accessed between the big queries (let* ((backcons (lambda (lst item)(cons item lst))) (all_tests '()) - (all_test_steps '()) - (all_test_data '()) (changed_run_dbs (db:get-changed-run-ids since-time)) ;; gets the rundb numbers (all_run_ids (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:fold-row backcons '() db "SELECT id FROM runs")) @@ -4124,16 +4093,10 @@ (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) ) ) - (run_stat_ids - (db:with-db dbstruct #f #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM run_stats WHERE last_update>=?" since-time)) - ) - ) ) (for-each (lambda (run_id) (set! all_tests (append @@ -4144,43 +4107,18 @@ ) ) ) all_tests ) ) - (set! all_test_steps - (append - (map (lambda (x) (cons x run_id)) - (db:with-db dbstruct run_id #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM test_steps WHERE last_update>=?" since-time) - ) - ) - ) all_test_steps - ) - ) - (set! all_test_data - (append - (map (lambda (x) (cons x run_id)) - (db:with-db dbstruct run_id #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db "SELECT id FROM test_data WHERE last_update>=?" since-time) - ) - ) - ) all_test_data - ) - ) ) changed_run_ids ) (debug:print 2 *default-log-port* "run_ids = " run_ids) (debug:print 2 *default-log-port* "all_tests = " all_tests) `((runs . ,run_ids) (tests . ,all_tests) - (test_steps . ,all_test_steps) - (test_data . ,all_test_data) - (run_stats . ,run_stat_ids) ) ) ) ;;====================================================================== @@ -4370,17 +4308,17 @@ ;; sync for filesystem local db writes ;; (define (db:run-lock-and-sync no-sync-db) (let* ((tmp-area (common:get-db-tmp-area)) - (dbfiles (glob (conc tmp-area"/.megatest/*.db"))) + (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) (sync-durations (make-hash-table))) ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) (for-each (lambda (file) (let* ((fname (conc (pathname-file file) ".db")) - (fulln (conc *toppath*"/.megatest/"fname)) + (fulln (conc *toppath*"/.mtdb/"fname)) (time1 (if (file-exists? file) (file-modification-time file) (begin (debug:print-info 0 *default-log-port* "Sync - I do not see file "file) 1))) @@ -4427,11 +4365,10 @@ (last-time (current-seconds)) ;; last time through the sync loop (no-sync-db (db:open-no-sync-db)) (sync-duration 0) ;; run time of the sync in milliseconds (tmp-area (common:get-db-tmp-area))) ;; Sync moved to http-transport keep-running loop - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area) (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));; " this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (begin @@ -4470,11 +4407,10 @@ (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) (stmt-cache #f) ;; (dbr:dbstruct-stmt-cache dbstruct)) (sync-duration 0) ;; run time of the sync in milliseconds (subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (begin @@ -4612,15 +4548,20 @@ (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) + (if (and *no-sync-db* + (sqlite3:database? *no-sync-db*)) + (sqlite3:finalize! *no-sync-db* #t)) (if (and (not (args:get-arg "-server")) - *runremote*) + *runremote* + (eq? (rmt:transport-mode) 'http)) (begin (debug:print-info 0 *default-log-port* "Closing all client connections...") - (http-client#close-all-connections!))) + (http-transport:close-connections *runremote*) + #;(http-client#close-all-connections!))) ;; (if (and *runremote* ;; (remote-conndat *runremote*)) ;; (begin ;; (http-client#close-all-connections!))) ;; for http-client (if (not (eq? *default-log-port* (current-error-port))) Index: db_records.scm ================================================================== --- db_records.scm +++ db_records.scm @@ -16,76 +16,10 @@ ;; along with Megatest. If not, see . ;;====================================================================== ;; dbstruct ;;====================================================================== - -;; -;; -path-|-megatest.db -;; |-db-|-main.db -;; |-monitor.db -;; |-sdb.db -;; |-fdb.db -;; |-1.db -;; |-.db -;; -;; -;; Accessors for a dbstruct -;; - -;; (define-inline (dbr:dbstruct-main vec) (vector-ref vec 0)) ;; ( db path ) -;; (define-inline (dbr:dbstruct-strdb vec) (vector-ref vec 1)) ;; ( db path ) -;; (define-inline (dbr:dbstruct-path vec) (vector-ref vec 2)) -;; (define-inline (dbr:dbstruct-local vec) (vector-ref vec 3)) -;; (define-inline (dbr:dbstruct-rundb vec) (vector-ref vec 4)) ;; ( db path ) -;; (define-inline (dbr:dbstruct-inmem vec) (vector-ref vec 5)) ;; ( db #f ) -;; (define-inline (dbr:dbstruct-mtime vec) (vector-ref vec 6)) -;; (define-inline (dbr:dbstruct-rtime vec) (vector-ref vec 7)) -;; (define-inline (dbr:dbstruct-stime vec) (vector-ref vec 8)) -;; (define-inline (dbr:dbstruct-inuse vec) (vector-ref vec 9)) -;; (define-inline (dbr:dbstruct-refdb vec) (vector-ref vec 10)) ;; ( db path ) -;; (define-inline (dbr:dbstruct-locdbs vec) (vector-ref vec 11)) -;; (define-inline (dbr:dbstruct-olddb vec) (vector-ref vec 12)) ;; ( db path ) -;; ;; (define-inline (dbr:dbstruct-main-path vec) (vector-ref vec 13)) -;; ;; (define-inline (dbr:dbstruct-rundb-path vec) (vector-ref vec 14)) -;; ;; (define-inline (dbr:dbstruct-run-id vec) (vector-ref vec 13)) -;; -;; (define-inline (dbr:dbstruct-main-set! vec val)(vector-set! vec 0 val)) -;; (define-inline (dbr:dbstruct-strdb-set! vec val)(vector-set! vec 1 val)) -;; (define-inline (dbr:dbstruct-path-set! vec val)(vector-set! vec 2 val)) -;; (define-inline (dbr:dbstruct-local-set! vec val)(vector-set! vec 3 val)) -;; (define-inline (dbr:dbstruct-rundb-set! vec val)(vector-set! vec 4 val)) -;; (define-inline (dbr:dbstruct-inmem-set! vec val)(vector-set! vec 5 val)) -;; (define-inline (dbr:dbstruct-mtime-set! vec val)(vector-set! vec 6 val)) -;; (define-inline (dbr:dbstruct-rtime-set! vec val)(vector-set! vec 7 val)) -;; (define-inline (dbr:dbstruct-stime-set! vec val)(vector-set! vec 8 val)) -;; (define-inline (dbr:dbstruct-inuse-set! vec val)(vector-set! vec 9 val)) -;; (define-inline (dbr:dbstruct-refdb-set! vec val)(vector-set! vec 10 val)) -;; (define-inline (dbr:dbstruct-locdbs-set! vec val)(vector-set! vec 11 val)) -;; (define-inline (dbr:dbstruct-olddb-set! vec val)(vector-set! vec 12 val)) -;; (define-inline (dbr:dbstruct-main-path-set! vec val)(vector-set! vec 13 val)) -;; (define-inline (dbr:dbstruct-rundb-path-set! vec val)(vector-set! vec 14 val)) -;; -; (define-inline (dbr:dbstruct-run-id-set! vec val)(vector-set! vec 13 val)) - -;; constructor for dbstruct -;; -;; (define (make-dbr:dbstruct #!key (path #f)(local #f)) -;; (let ((v (make-vector 15 #f))) -;; (dbr:dbstruct-path-set! v path) -;; (dbr:dbstruct-local-set! v local) -;; (dbr:dbstruct-locdbs-set! v (make-hash-table)) -;; v)) - -;; Returns the database for a particular run-id fron the dbstruct:localdbs -;; -(define (dbr:dbstruct-localdb v run-id) - (hash-table-ref/default (dbr:dbstruct-locdbs v) run-id #f)) - -(define (dbr:dbstruct-localdb-set! v run-id db) - (hash-table-set! (dbr:dbstruct-locdbs v) run-id db)) - (define (make-db:test)(make-vector 20)) (define-inline (db:test-get-id vec) (vector-ref vec 0)) (define-inline (db:test-get-run_id vec) (vector-ref vec 1)) (define-inline (db:test-get-testname vec) (vector-ref vec 2)) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -15,35 +15,56 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== + +(use srfi-18) (declare (unit dbfile)) -;; (declare (uses debugprint)) +(declare (uses debugprint)) (declare (uses commonmod)) (module dbfile * (import scheme chicken data-structures extras - matchable) - -(import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 srfi-1 - srfi-69 - stack - files - ports - - commonmod - ) - -;; (import debugprint) + matchable + + (prefix sqlite3 sqlite3:) + posix typed-records + + srfi-18 + srfi-1 + srfi-69 + stack + files + ports + + commonmod + debugprint + ) + +;; parameters +;; +(define dbfile:testsuite-name (make-parameter #f)) + +(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic +(define num-run-dbs (make-parameter 10)) ;; number of db's in .mtdb +(define dbfile:sync-method (make-parameter 'attach)) ;; 'attach or 'original +(define dbfile:cache-method (make-parameter 'cachedb)) ;; 'direct +(define dbcache-mode (make-parameter 'tmp)) ;; 'cachedb, 'tmp (changes what open cachedb routine does) + + +;; 'original - use old condition code +;; 'suicide-mode - create mtrah/stop-the-train with info on what went wrong +;; else use no condition code (should be production mode) +;; +(define no-condition-db-with-db (make-parameter 'suicide-mode)) ;;====================================================================== ;; R E C O R D S ;;====================================================================== @@ -54,21 +75,32 @@ (areapath #f) (homehost #f) (tmppath #f) (read-only #f) (subdbs (make-hash-table)) + ;; + ;; for the cachedb approach (see dbmod.scm) + ;; this is one db per server + (cachedb #f) ;; handle for the in memory copy + (dbfile #f) ;; path to the db file on disk + (dbfname #f) ;; short name of db file on disk (used to validate accessing correct db) + (ondiskdb #f) ;; handle for the on-disk file + (dbtmpname #f) ;; path to db file in /tmp (non-imem method) + (dbdat #f) ;; create a dbdat for the downstream calls such as db:with-db + (last-update 0) + (sync-proc #f) ) ;; NOTE: Need one dbr:subdb per main.db, 1.db ... ;; (defstruct dbr:subdb - (dbname #f) ;; .megatest/1.db - (mtdbfile #f) ;; mtrah/.megatest/1.db + (dbname #f) ;; .mtdb/1.db + (mtdbfile #f) ;; mtrah/.mtdb/1.db (mtdbdat #f) ;; only need one of these for syncing ;; (dbdats (make-hash-table)) ;; id => dbdat - (tmpdbfile #f) ;; /tmp/.../.megatest/1.db - ;; (refndbfile #f) ;; /tmp/.../.megatest/1.db_ref + (tmpdbfile #f) ;; /tmp/.../.mtdb/1.db + ;; (refndbfile #f) ;; /tmp/.../.mtdb/1.db_ref (dbstack (make-stack)) ;; stack for tmp dbr:dbdat, (homehost #f) ;; not used yet (on-homehost #f) ;; not used yet (read-only #f) (last-sync 0) @@ -81,10 +113,16 @@ (dbh #f) (stmt-cache (make-hash-table)) (read-only #f) (birth-sec (current-seconds))) +;; used in simple-get-runs (thanks Brandon!) +(define-record simple-run target id runname state status owner event_time) +(define-record-printer (simple-run x out) + (fprintf out "#,(simple-run ~S ~S ~S ~S)" + (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) + (define *dbstruct-dbs* #f) (define *db-open-mutex* (make-mutex)) (define *db-access-mutex* (make-mutex)) ;; used in common.scm (define *no-sync-db* #f) (define *db-sync-in-progress* #f) @@ -92,10 +130,13 @@ (define *max-api-process-requests* 0) (define *api-process-request-count* 0) (define *db-write-access* #t) (define *db-last-sync* 0) ;; last time the sync to megatest.db happened (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* +(define *db-last-access* (current-seconds)) + +(define *db-transaction-mutex* (make-mutex)) (define (db:generic-error-printout exn . message) (print-call-chain (current-error-port)) (apply dbfile:print-err message) (dbfile:print-err @@ -157,51 +198,37 @@ ) #f ) ) -;; ;; set up a single db (e.g. main.db, 1.db ... etc.) -;; ;; -;; (define (db:setup-db dbstruct areapath run-id) -;; (let* ((dbname (db:run-id->dbname run-id)) -;; (dbstruct (hash-table-ref/default dbstructs dbname #f))) -;; (if dbstruct -;; dbstruct -;; (let* ((dbstruct-new (make-dbr:dbstruct))) -;; (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t) -;; (hash-table-set! dbstructs dbname dbstruct-new) -;; dbstruct-new)))) - -;; ; Returns the dbdat for a particular dbfile inside the area -;; ;; -;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile) -;; (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f)) -;; -;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db) -;; (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db)) -;; -;; (define (db:run-id->first-num run-id) -;; (let* ((s (number->string run-id)) -;; (l (string-length s))) -;; (substring s (- l 1) l))) - -;; 1234 => 4/1234.db -;; #f => 0/main.db -;; (abandoned the idea of num/db) -;; +(define (dbfile:make-tmpdir-name areapath) + (let* ((dname (conc "/tmp/"(current-user-name)"/" (string-translate areapath "/" ".")))) + (create-directory dname #t) + dname)) + (define (dbfile:run-id->path apath run-id) (conc apath"/"(dbfile:run-id->dbname run-id))) (define (db:dbname->path apath dbname) (conc apath"/"dbname)) -;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number -(define (dbfile:run-id->dbname run-id) +(define (dbfile:run-id->dbnum run-id) (cond - ((number? run-id) (conc ".megatest/" (modulo run-id 100) ".db")) - ((not run-id) (conc ".megatest/main.db")) - (else run-id))) + ((number? run-id) + (modulo run-id (num-run-dbs))) + ((not run-id) "main") ;; 0 or main? No, not 0. + (else + (assert #f "FATAL: run-id is required to be a number or #f")))) + +;; just the filename +(define (dbfile:run-id->dbfname run-id) + (conc (dbfile:run-id->dbnum run-id)".db")) + +;; the path in MTRAH with the filename +(define (dbfile:run-id->dbname run-id) + (conc ".mtdb/"(dbfile:run-id->dbfname run-id))) + ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; @@ -209,14 +236,12 @@ (cond (*dbstruct-dbs* (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard (else - (let* ((dbstruct (make-dbr:dbstruct))) + (let* ((dbstruct (make-dbr:dbstruct areapath: areapath tmppath: tmppath))) (set! *dbstruct-dbs* dbstruct) - (dbr:dbstruct-areapath-set! dbstruct areapath) - (dbr:dbstruct-tmppath-set! dbstruct tmppath) dbstruct)))) (define (dbfile:get-subdb dbstruct run-id) (let* ((dbfname (dbfile:run-id->dbname run-id))) (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f))) @@ -224,16 +249,19 @@ (define (dbfile:set-subdb dbstruct run-id subdb) (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb)) ;; (define *dbfile:num-handles-in-use* 0) -;; Get/open a database +;; Get/open a database. +;; +;; NOTE: most usage should call dbfile:open-db to get a dbdat +;; ;; if run-id => get run specific db ;; if #f => get main db -;; if run-id is a string treat it as a filename -;; if db already open - return inmem -;; if db not open, open inmem, rundb and sync then return inmem +;; if run-id is a string treat it as a filename - DON'T use this - we'll get rid of it. +;; if db already open - return cachedb +;; if db not open, open cachedb, rundb and sync then return cachedb ;; inuse gets set automatically for rundb's ;; (define (dbfile:get-dbdat dbstruct run-id) (let* ((subdb (dbfile:get-subdb dbstruct run-id))) (if (stack-empty? (dbr:subdb-dbstack subdb)) @@ -241,12 +269,16 @@ (begin (stack-pop! (dbr:subdb-dbstack subdb)))))) ;; return a previously opened db handle to the stack of available handles (define (dbfile:add-dbdat dbstruct run-id dbdat) - (let* ((subdb (dbfile:get-subdb dbstruct run-id))) - (stack-push! (dbr:subdb-dbstack subdb) dbdat) + (let* ((subdb (dbfile:get-subdb dbstruct run-id)) + (dbstk (dbr:subdb-dbstack subdb)) + (count (stack-count dbstk))) + (if (> count 15) + (dbfile:print-err "WARNING: stack for "run-id".db is "count".")) + (stack-push! dbstk dbdat) dbdat)) ;; set up a subdb ;; (define (dbfile:init-subdb dbstruct run-id init-proc) @@ -326,21 +358,21 @@ (define (dbfile:print-err . params) (with-output-to-port (current-error-port) (lambda () (apply print params)))) - + (define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500)) - (let* ((busy-file (conc fname"-journal")) + (let* ((busy-file (conc fname "-journal")) (delay-time (* (- 51 tries-left) 1.1)) (write-access (file-write-access? fname)) (dir-access (file-write-access? (pathname-directory fname))) (retry (lambda () (thread-sleep! delay-time) (if (> tries-left 0) (dbfile:cautious-open-database fname init-proc - sync-mode: sync-mode journal-mode: journal-mode + sync-mode journal-mode (- tries-left 1)))))) (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) (if (and (file-write-access? fname) (file-exists? busy-file)) @@ -351,11 +383,11 @@ (thread-sleep! 1) (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: forcing journal rollup "busy-file) (dbfile:brute-force-salvage-db fname))) - (dbfile:cautious-open-database fname init-proc sync-mode: sync-mode journal-mode: journal-mode (- tries-left 1))) + (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1))) (let* ((result (condition-case (if dir-access (dbfile:with-simple-file-lock (conc fname ".lock") @@ -402,37 +434,81 @@ "cp "backupfname" "fname))) (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n" " "cmd) (system cmd))) +;; opens and returns handle and nothing else +;; +;; NOTE: this is already protected by mutex *no-sync-db-mutex* +;; +(define (dbfile:raw-open-no-sync-db dbpath) + (if (not (file-exists? dbpath)) + (create-directory dbpath #t)) + (debug:print-info 0 *default-log-port* "Opening "dbpath"/no-sync.db") + (let* ((dbname (conc dbpath "/no-sync.db")) + (db-exists (file-exists? dbname)) + (init-proc (lambda (db) + (sqlite3:with-transaction + db + (lambda () + ;; I have been having trouble with init of no-sync.db so + ;; doing the init in a transaction every time (no gating + ;; on file existance. + (for-each + (lambda (stmt) + (sqlite3:execute db stmt)) + (list + "CREATE TABLE IF NOT EXISTS no_sync_metadat + (var TEXT, + val TEXT, + CONSTRAINT no_sync_metadat_constraint UNIQUE (var));" + "CREATE TABLE IF NOT EXISTS no_sync_locks + (key TEXT, + val TEXT, + CONSTRAINT no_sync_metadat_constraint UNIQUE (key));")))))) + (on-tmp (equal? (car (string-split dbpath "/")) "tmp")) + (db (if on-tmp + (dbfile:cautious-open-database dbname init-proc 0 "WAL") + (dbfile:cautious-open-database dbname init-proc 0 #f) + ;; (sqlite3:open-database dbname) + ))) + (if on-tmp ;; done in cautious-open-database + (begin + (sqlite3:execute db "PRAGMA synchronous = 0;") + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)))) + db)) + +(define (dbfile:with-no-sync-db dbpath proc) + (mutex-lock! *no-sync-db-mutex*) + (let* ((already-open *no-sync-db*) + (db (or already-open (dbfile:raw-open-no-sync-db dbpath))) + (res (proc db))) + (if (not already-open) + (sqlite3:finalize! db)) + (mutex-unlock! *no-sync-db-mutex*) + res)) + +(define *no-sync-db-mutex* (make-mutex)) (define (dbfile:open-no-sync-db dbpath) - (if *no-sync-db* - *no-sync-db* - (begin - (if (not (file-exists? dbpath)) - (create-directory dbpath #t)) - (let* ((dbname (conc dbpath "/no-sync.db")) - (db-exists (file-exists? dbname)) - (init-proc (lambda (db) - (if (not db-exists) - (begin - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")) - ))) - (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname))) - ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database - (set! *no-sync-db* db) - db)))) + (mutex-lock! *no-sync-db-mutex*) + (let* ((res (if *no-sync-db* + *no-sync-db* + (let* ((db (dbfile:raw-open-no-sync-db dbpath))) + (set! *no-sync-db* db) + db)))) + (mutex-unlock! *no-sync-db-mutex*) + res)) (define (db:no-sync-set db var val) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) (define (db:no-sync-del! db var) (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var)) (define (db:no-sync-get/default db var default) + (assert (sqlite3:database? db) "FATAL: db:no-sync-get/default called with a bad db handle:" db) (let ((res default)) (sqlite3:for-each-row (lambda (val) (set! res val)) db @@ -445,10 +521,72 @@ (if newres newres res)) res))) +;; timestring+identifier+payload +;; locks are unique on identifier, payload is informational +(define (db:extract-time-identifier instr) + (let ((tokens (string-split instr "+"))) + (match tokens + ((t i)(cons (string->number t) i)) + ((t) (cons (string->number t) #f)) + (else + (assert #f "FATAL: db:extract-time-identifier handed bad data "instr))))) + +;; transaction protected lock aquisition +;; either: +;; fails returns (#f lock-creation-time identifier) +;; succeeds (returns (#t lock-creation-time identifier) +;; use (db:no-sync-del! db keyname) to release the lock +;; +(define (db:no-sync-get-lock-with-id db keyname identifier) + (sqlite3:with-transaction + db + (lambda () + (condition-case + (let* ((curr-val (db:no-sync-get/default db keyname #f))) + (if curr-val + (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier + ((timestamp . ident) + (cons (equal? ident identifier) timestamp)) + (else (cons #f 'malformed-lock))) ;; lock malformed + (let ((curr-sec (current-seconds)) + (lock-value (if identifier + (conc (current-seconds)"+"identifier) + (current-seconds)))) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value) + (cons #t curr-sec)))) + (exn (io-error) (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again.")) + (exn (corrupt) (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed.")) + (exn (busy) (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem.")) + (exn () ;; (status done) ;; I don't know how to detect status done but no data! + (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n" + ((condition-property-accessor 'exn 'message) exn)) + (cons #f #f)))))) + +(define (db:no-sync-check-lock db keyname identifier) + (let* ((curr-val (db:no-sync-get/default db keyname #f))) + (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier + ((timestamp . ident) + (cons (equal? ident identifier) ident)) + (else (cons #f 'no-lock))))) + +;; get the lock, wait 0.25 seconds and verify still have it. +;; this should not be necessary given the use of transaction in +;; db:no-sync-get-lock-with-id but it does seem to be needed +;; +(define (db:no-sync-lock-and-check db keyname identifier) + (let* ((result (db:no-sync-get-lock-with-id db keyname identifier)) + (gotlock (car result))) + (if gotlock + (begin + (thread-sleep! 0.25) + (db:no-sync-check-lock db keyname identifier)) + result))) + ;; transaction protected lock aquisition ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock @@ -513,11 +651,12 @@ (db:sync-touched dbstruct runid keys dbinit) (set! *db-sync-in-progress* #f) (delete-file* lock-file) #t) (begin - (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.") + (if (common:low-noise-print 120 (conc "no lock "from-db-file)) + (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.")) #f )))) ;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f ;; ;; @@ -556,11 +695,11 @@ (tmpdb (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f)) (start-t (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) ))) (mutex-unlock! *db-multi-sync-mutex*) - (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb)) + (db:sync-tables (db:sync-all-tables-list keys) update_info tmpdb mtdb)) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) (dbfile:add-dbdat dbstruct run-id tmpdb) @@ -619,12 +758,12 @@ '("type" #f) '("last_update" #f)))) ;; needs db to get keys, this is for syncing all tables ;; -(define (db:sync-main-list dbstruct keys) - (let ((keys keys)) ;; (db:get-keys dbstruct))) +(define (db:sync-main-list keys) + (let ((keys keys)) (list (list "keys" '("id" #f) '("fieldname" #f) '("fieldtype" #f)) @@ -658,14 +797,29 @@ '("reviewed" #f) '("iterated" #f) '("avg_runtime" #f) '("avg_disk" #f) '("tags" #f) - '("jobgroup" #f))))) + '("jobgroup" #f)) + + + (list "tasks_queue" + '("id" #f) + '("action" #f) + '("owner" #f) + '("state" #f) + '("target" #f) + '("name" #f) + '("testpatt" #f) + '("keylock" #f) + '("params" #f) + '("creation_time" #f) + '("execution_time" #f)) + ))) -(define (db:sync-all-tables-list dbstruct keys) - (append (db:sync-main-list dbstruct keys) +(define (db:sync-all-tables-list keys) + (append (db:sync-main-list keys) db:sync-tests-only)) ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; db's are dbdat's ;; @@ -785,22 +939,27 @@ fields) ;; read the source table ;; store a list of all rows in the table in fromdat, up to batch-len. ;; Then add fromdat to the fromdats list, clear fromdat and repeat. - (sqlite3:for-each-row - (lambda (a . b) - (set! fromdat (cons (apply vector a b) fromdat)) - (if (> (length fromdat) batch-len) - (begin - (set! fromdats (cons fromdat fromdats)) - (set! fromdat '()) - (set! totrecords (+ totrecords 1))) - ) - ) - (dbr:dbdat-dbh fromdb) - full-sel) + (sqlite3:with-transaction + (dbr:dbdat-dbh fromdb) + (lambda () + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))) + ) + ) + (dbr:dbdat-dbh fromdb) + full-sel) + ) + ) ;; Count less than batch-len as a record (if (> (length fromdat) 0) (set! totrecords (+ totrecords 1))) @@ -870,11 +1029,12 @@ ) ) tbls) (let* ((runtime (- (current-milliseconds) start-time)) (should-print (or ;; (debug:debug-mode 12) - (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. + (common:low-noise-print 120 "db sync") + (> runtime 500)))) ;; low and high sync times treated as separate. (for-each (lambda (dat) (let ((tblname (car dat)) (count (cdr dat))) (set! tot-count (+ tot-count count)) @@ -915,30 +1075,10 @@ FOR EACH ROW BEGIN UPDATE test_data SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ))) -;; -;; ADD run-id SUPPORT -;; -(define (db:create-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:create-triggers db)))) - -(define (db:create-triggers db) - (for-each (lambda (key) - (sqlite3:execute db (cadr key))) - db:trigger-list)) - -(define (db:drop-all-triggers dbstruct) - (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (db:drop-triggers db)))) - (define (db:is-trigger-dropped db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger"))) (res #f)) @@ -988,58 +1128,144 @@ ;; (mutex-unlock! *db-open-mutex*) dbdat)) (define dbfile:db-init-proc (make-parameter #f)) -;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") -;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; in xmaxima this gives a curve close to what I want: +;; plot2d ((exp(x/1.2)-1)/300, [x, 0, 10])$ +;; plot2d ((exp(x/1.5)-1)/40, [x, 0, 10])$ +;; plot2d ((exp(x/5)-1)/40, [x, 0, 20])$ +(define (dbfile:droop x) + (/ (- (exp (/ x 5)) 1) 40)) + ;; (* numqrys (/ 1 (qif-slope)))) + +;; create a dropping near the db file in a qif dir +;; use count of such files to gate queries (queries in flight) ;; -(define (db:with-db dbstruct run-id r/w proc . params) - (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption - (have-struct (dbr:dbstruct? dbstruct)) - (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly - (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) - #f)) - (db (if have-struct ;; this stuff just allows us to call with a db handle directly - (dbr:dbdat-dbh dbdat) - dbstruct)) - (fname (if dbdat - (dbr:dbdat-dbfile dbdat) - "nofilenameavailable")) - (jfile (conc fname"-journal")) - #;(subdb (if have-struct - (dbfile:get-subdb dbstruct run-id) - #f)) - ) ;; was 25 - (if (file-exists? jfile) - (begin - (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") - (thread-sleep! 0.2))) - (if (and use-mutex - (common:low-noise-print 120 "over-50-parallel-api-requests")) - (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " - (current-process-id))) ;; ", throttling access")) - (condition-case - (begin - (if use-mutex (mutex-lock! *db-with-db-mutex*)) - (let ((res (apply proc dbdat db params))) ;; the actual call is here. - (if use-mutex (mutex-unlock! *db-with-db-mutex*)) - ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat - (dbfile:add-dbdat dbstruct run-id dbdat)) - res)) - (exn (io-error) - (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) - (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) - (db:generic-error-printout exn "ERROR: database " fname - " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) - (exn () - (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " - ((condition-property-accessor 'exn 'message) exn)))))) +(define (dbfile:wait-for-qif fname run-id params) + (let* ((thedir (pathname-directory fname)) + (dbnum (dbfile:run-id->dbnum run-id)) + (destdir (conc thedir"/qif-"dbnum)) + (uniqn (get-area-path-signature (conc dbnum params))) + (crumbn (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id)))) + (if (not (file-exists? destdir))(create-directory (conc destdir"/attic") #t)) + (let loop ((count 0)) + (let* ((currlks (glob (conc destdir"/*"))) + (numqrys (length currlks)) + (delayval (cond ;; do a droopish curve + ((> numqrys 25) + (for-each + (lambda (f) + (if (> (- (current-seconds) + (handle-exceptions + exn + (current-seconds) ;; file is likely gone, just fake out + (file-modification-time f))) + (keep-age-param)) + (let* ((basedir (pathname-directory f)) + (filen (pathname-file f)) + (destf (conc basedir"/attic/"filen))) + (dbfile:print-err "Moving qif file "f" older than 10 seconds to "destf) + ;; (delete-file* f) + (handle-exceptions + exn + #t + (file-move f destf #t))))) + currlks) + 4) + ((> numqrys 0) (dbfile:droop numqrys)) ;; slope of 1/100 + (else #f)))) + (if (and delayval + (< count 5)) + (begin + (thread-sleep! delayval) + (loop (+ count 1)))))) + (with-output-to-file crumbn + (lambda () + (print fname" run-id="run-id" params="params) + )) + crumbn)) + +;; ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") +;; ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; ;; +;; ;; Used only with http - to be removed +;; ;; +;; (define (dbfile:with-db dbstruct run-id r/w proc params) +;; (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) +;; (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) +;; ;; Testing 2023, March 14th. I went from full time use of the mutext to no use at all and +;; ;; didn't see much change in the frequency of the messages: +;; ;; Warning (#): in thread: (bind!) bad parameter or other API misuse +;; ;; allowing request count to go up to 1000 and other crashes showed up: +;; ;; Warning (#): in thread: (deserialize) unexpected end of input: # +;; ;; +;; ;; leave it fully on for now, test later if there is a performance issue +;; ;; +;; (let* ((use-mutex #t) ;;(> *api-process-request-count* 50)) ;; risk of db corruption +;; (have-struct (dbr:dbstruct? dbstruct)) +;; (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly +;; (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) +;; #f)) +;; (db (if have-struct ;; this stuff just allows us to call with a db handle directly +;; (dbr:dbdat-dbh dbdat) +;; dbstruct)) +;; (fname (if dbdat +;; (dbr:dbdat-dbfile dbdat) +;; "nofilenameavailable")) +;; (jfile (conc fname"-journal")) +;; (qryproc (lambda () +;; (if use-mutex (mutex-lock! *db-with-db-mutex*)) +;; (let ((res (apply proc dbdat db params))) ;; the actual call is here. +;; (if use-mutex (mutex-unlock! *db-with-db-mutex*)) +;; ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) +;; (if dbdat +;; (dbfile:add-dbdat dbstruct run-id dbdat)) +;; ;; (delete-file* crumbfile) +;; res))) +;; (stop-train (conc (dbr:dbstruct-areapath dbstruct)"/stop-the-train"))) +;; +;; (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db +;; ", fname="fname) +;; (if (file-exists? jfile) +;; (begin +;; (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") +;; (thread-sleep! 0.2))) +;; (if (and use-mutex +;; (common:low-noise-print 120 "over-50-parallel-api-requests")) +;; (dbfile:print-err *api-process-request-count* +;; " parallel api requests being processed in process " +;; (current-process-id))) ;; ", throttling access")) +;; (case (no-condition-db-with-db) +;; ((production)(qryproc)) +;; ((suicide-mode) +;; (handle-exceptions +;; exn +;; (with-output-to-file stop-train +;; (lambda () +;; (db:generic-error-printout exn "Stop train mode, run-id: "run-id +;; " params: "params" proc: "proc))) +;; (qryproc))) +;; (else +;; (condition-case +;; (qryproc) +;; (exn (io-error) +;; (db:generic-error-printout exn "ERROR: i/o error with "fname +;; ". Check permissions, disk space etc. and try again.")) +;; (exn (corrupt) +;; (db:generic-error-printout exn "ERROR: database "fname +;; " is corrupt. Repair it to proceed.")) +;; (exn (busy) +;; (db:generic-error-printout exn "ERROR: database "fname +;; " is locked. Try copying to another location," +;; " remove original and copy back.")) +;; (exn (permission)(db:generic-error-printout exn "ERROR: database "fname +;; " has some permissions problem.")) +;; (exn () +;; (db:generic-error-printout exn "ERROR: Unknown error with database "fname +;; " message: " +;; ((condition-property-accessor 'exn 'message) exn)))))))) ;;====================================================================== ;; another attempt at a transactionized queue ;;====================================================================== @@ -1112,34 +1338,45 @@ (define (dbfile:simple-file-lock fname #!key (expire-time 300)) (let ((fmod-time (handle-exceptions ext (current-seconds) (file-modification-time fname)))) + + ;; if the file exists, if it has expired, delete it and call this function recursively. (if (file-exists? fname) (if (> (- (current-seconds) fmod-time) expire-time) (begin + (dbfile:print-err "simple-file-lock: removing expired file: " fname) (handle-exceptions exn #f (delete-file* fname)) (dbfile:simple-file-lock fname expire-time: expire-time)) - #f) - (let ((key-string (conc (get-host-name) "-" (current-process-id))) + #f + ) + + ;; If it doesn't exist, write the host name and process id to the file + (let ((key-string (conc (get-host-name) "-" (current-process-id) ": " (argv))) (oup (open-output-file fname))) (with-output-to-port oup (lambda () (print key-string))) (close-output-port oup) - #;(with-output-to-file fname ;; bizarre. with-output-to-file does not seem to be cleaning up after itself. - (lambda () - (print key-string))) + + + ;; sleep 3 seconds and make sure it still exists and contains the same host/process id. + ;; if not, return #f (thread-sleep! 0.25) (if (file-exists? fname) (handle-exceptions exn #f (with-input-from-file fname (lambda () (equal? key-string (read-line))))) - #f) + (begin + (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 3 seconds later") + #f + ) + ) ) ) ) ) @@ -1159,13 +1396,44 @@ exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) (define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300)) - (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))) + (let ((start-time (current-seconds)) + (gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)) + (end-time (current-seconds)) + ) (if gotlock (let ((res (proc))) (dbfile:simple-file-release-lock fname) res) - (assert #t "FATAL: simple file lock never got a lock.")))) - + (begin + (dbfile:print-err "dbfile:with-simple-file-lock: " fname " is locked by " + (with-input-from-file fname + (lambda () + (dbfile:print-err (read-line))))) + (dbfile:print-err "wait time = " (- end-time start-time)) + (dbfile:print-err "ERROR: simple file lock could not get a lock for " fname " in " expire-time " seconds") + #f + ) + ) + ) +) + + +(define *get-cache-stmth-mutex* (make-mutex)) + +(define (db:get-cache-stmth dbdat db stmt) + (mutex-lock! *get-cache-stmth-mutex*) + (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id)) + (stmt-cache (dbr:dbdat-stmt-cache dbdat)) + ;; (stmth (db:hoh-get stmt-cache db stmt)) + (stmth (hash-table-ref/default stmt-cache stmt #f)) + (result (or stmth + (let* ((newstmth (sqlite3:prepare db stmt))) + ;; (db:hoh-set! stmt-cache db stmt newstmth) + (hash-table-set! stmt-cache stmt newstmth) + newstmth)))) + (mutex-unlock! *get-cache-stmth-mutex*) + result)) + ) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -1,5 +1,6 @@ + ;;====================================================================== ;; Copyright 2017, Matthew Welland. ;; ;; This file is part of Megatest. ;; @@ -17,40 +18,820 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbmod)) +(declare (uses dbfile)) +(declare (uses commonmod)) +(declare (uses debugprint)) (module dbmod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) - posix typed-records srfi-18 - srfi-69) - -(define (db:run-id->dbname run-id) - (cond - ((number? run-id)(conc run-id ".db")) - ((not run-id) "main.db") - (else run-id))) - - -;;====================================================================== -;; hash of hashs -;;====================================================================== - - -(define (db:hoh-set! dat key1 key2 val) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (if subhash - (hash-table-set! subhash key2 val) - (begin - (hash-table-set! dat key1 (make-hash-table)) - (db:hoh-set! dat key1 key2 val))))) - -(define (db:hoh-get dat key1 key2) - (let* ((subhash (hash-table-ref/default dat key1 #f))) - (and subhash - (hash-table-ref/default subhash key2 #f)))) - +(import scheme + chicken + data-structures + extras + files + + (prefix sqlite3 sqlite3:) + matchable + posix + typed-records + srfi-1 + srfi-18 + srfi-69 + + commonmod + dbfile + debugprint + ) + +;; NOTE: This returns only the name "1.db", "main.db", not the path +;; +(define (dbmod:run-id->dbfname run-id) + (conc (dbfile:run-id->dbnum run-id)".db")) + +(define (dbmod:get-dbdir dbstruct) + (let* ((areapath (dbr:dbstruct-areapath dbstruct)) + (dbdir (conc areapath"/.mtdb"))) + (if (and (file-write-access? areapath) + (not (file-exists? dbdir))) + (create-directory dbdir)) + dbdir)) + +(define (dbmod:run-id->full-dbfname dbstruct run-id) + (conc (dbmod:get-dbdir dbstruct + + run-id + + )"/"(dbmod:run-id->dbfname run-id))) + +;;====================================================================== +;; Read-only cachedb cached direct from disk method +;;====================================================================== + +(define *dbmod:nfs-db-handles* (make-hash-table)) ;; dbfname -> dbstruct + +;; called in rmt.scm nfs-transport-handler +(define (dbmod:nfs-get-dbstruct run-id keys init-proc areapath) + (assert areapath "FATAL: dbmod:nfs-get-dbstruct called without areapath set.") + (let* ((dbfname (dbmod:run-id->dbfname run-id)) + (dbstruct (hash-table-ref/default *dbmod:nfs-db-handles* dbfname #f))) + (if dbstruct + dbstruct + (let* ((newdbstruct (dbmod:open-dbmoddb areapath run-id dbfname init-proc keys syncdir: 'fromdisk))) + (hash-table-set! *dbmod:nfs-db-handles* dbfname newdbstruct) + newdbstruct)))) + +;;====================================================================== +;; The cachedb one-db file per server method goes in here +;;====================================================================== + +;; NOTE: the r/w is now w/r, #t=db modified by query, #f=db NOT modified by query +(define (dbmod:with-db dbstruct run-id w/r proc params) + (let* ((use-mutex (or (and w/r ;; use the mutex on queries that modify the db and for sync to disk + (> *api-process-request-count* 5)) ;; when writes are happening throttle more + (> *api-process-request-count* 50))) + (dbdat (dbmod:open-db dbstruct run-id (dbfile:db-init-proc))) + (dbh (dbr:dbdat-dbh dbdat)) ;; this will be the cachedb handle + (dbfile (dbr:dbdat-dbfile dbdat))) + ;; if nfs mode do a sync if delta > 2 + (let* ((last-update (dbr:dbstruct-last-update dbstruct)) + (sync-proc (dbr:dbstruct-sync-proc dbstruct)) + (curr-secs (current-seconds))) + (if (and (not (eq? (dbfile:cache-method) 'none)) ;; used by dashboard, no need for sync + (> (- curr-secs last-update) 5)) + (begin + (sync-proc last-update) + + ;; MOVE THIS CALL TO INSIDE THE sync-proc CALL + (dbr:dbstruct-last-update-set! dbstruct curr-secs) + ))) + (assert (sqlite3:database? dbh) "FATAL: bad db handle in dbmod:with-db") + (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (let* ((res (let loop ((count 3)) + (condition-case + (apply proc dbdat dbh params) + (exn (busy) + (if (> count 0) + (begin + (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, will try "count" more times.") + (thread-sleep! 1) + (loop (- count 1))) + (begin + (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, giving up.") + (exit 1)))) + (exn () + (dbfile:print-err exn "ERROR: Unknown error with database for run-id "run-id", message: " + ((condition-property-accessor 'exn 'message) exn)) + (exit 2)))))) + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) + res))) + +(define (db:with-db dbstruct run-id w/r proc . params) + (dbmod:with-db dbstruct run-id w/r proc params)) + +;; +(define (dbmod:open-cachedb-db init-proc dbfullname) + (let* ((db (if dbfullname + (dbmod:safely-open-db dbfullname init-proc #t) + (sqlite3:open-database ":memory:"))) + (handler (sqlite3:make-busy-timeout 136000))) + (sqlite3:set-busy-handler! db handler) + (init-proc db) + db)) + +(define (dbmod:open-db dbstruct run-id dbinit) + (or (dbr:dbstruct-dbdat dbstruct) + (let* ((dbdat (make-dbr:dbdat + dbfile: (dbr:dbstruct-dbfile dbstruct) + dbh: (dbr:dbstruct-cachedb dbstruct) + ))) + (dbr:dbstruct-dbdat-set! dbstruct dbdat) + dbdat))) + +;; NOT USED? +(define (dbmod:need-on-disk-db-handle) + (case (dbfile:cache-method) + ((none tmp) #t) + ((cachedb) + (case (dbfile:sync-method) + ((original) #t) + ((attach) #t) ;; we need it to force creation of the on-disk file - FIXME + (else + (debug:print 0 *default-log-port* "Unknown dbfile:sync-method setting: " + (dbfile:sync-method))))) + (else + (debug:print 0 *default-log-port* "Unknown dbfile:cache-method setting: " + (dbfile:cache-method)) + #f))) + +(define (dbmod:safely-open-db dbfullname init-proc write-access) + (dbfile:with-simple-file-lock + (conc dbfullname".lock") + (lambda () + (let* ((dbexists (file-exists? dbfullname)) + (db (sqlite3:open-database dbfullname)) + (handler (sqlite3:make-busy-timeout 136000))) + (sqlite3:set-busy-handler! db handler) + (if (and dbexists + write-access) + (init-proc db)) + db)))) + +(define *sync-in-progress* #f) + +;; Open the cachedb db and the on-disk db +;; populate the cachedb db with data +;; +;; Updates fields in dbstruct +;; Returns dbstruct +;; +;; * This routine creates the db if not found +;; * Probably can get rid of the dbstruct-in +;; +(define (dbmod:open-dbmoddb areapath run-id dbfname-in init-proc keys + #!key (dbstruct-in #f) + ;; (dbcontext 'megatest) ;; use dashboard to do the dashboard + (syncdir 'todisk)) ;; todisk is used when caching in /tmp and writing data back to MTRAH + (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath))) + (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) + (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept + (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id)) + (dbexists (file-exists? dbfullname)) + (tmpdir (dbfile:make-tmpdir-name areapath)) + (tmpdb (let* ((fname (conc tmpdir"/"dbfname))) + fname)) + (cachedb (dbmod:open-cachedb-db init-proc + ;; (if (eq? (dbfile:cache-method) 'cachedb) + ;; #f + tmpdb + ;; ) + )) + (write-access (file-write-access? dbpath)) + (db (dbmod:safely-open-db dbfullname init-proc write-access)) + (tables (db:sync-all-tables-list keys))) + (if (not (and (sqlite3:database? cachedb) + (sqlite3:database? db))) + (begin + (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.") + (exit))) ;; (assert (sqlite3:database? cachedb) "FATAL: open-dbmoddb: cachedb is not a db") + ;; (assert (sqlite3:database? db) "FATAL: open-dbmoddb: db is not a db") + (dbr:dbstruct-cachedb-set! dbstruct cachedb) + (dbr:dbstruct-ondiskdb-set! dbstruct db) + (dbr:dbstruct-dbfile-set! dbstruct dbfullname) + (dbr:dbstruct-dbtmpname-set! dbstruct tmpdb) + (dbr:dbstruct-dbfname-set! dbstruct dbfname) + (dbr:dbstruct-sync-proc-set! dbstruct + (lambda (last-update) + (if *sync-in-progress* + (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk") + (let* ((syncer-logfile (conc areapath"/logs/"dbfname"-syncer.log")) + (sync-cmd (conc "NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "tmpdb" -to "dbfullname" -period 5 -timeout 10 &")) + (synclock-file (conc dbfullname".lock")) + (syncer-running-file (conc dbfullname"-sync-running")) + (synclock-mod-time (if (file-exists? synclock-file) + (handle-exceptions + exn + #f + (file-modification-time synclock-file)) + #f)) + (thethread (lambda () + (thread-start! + (make-thread + (lambda () + (set! *sync-in-progress* #t) + (debug:print-info "Running "sync-cmd) + (if (file-exists? syncer-running-file) + (debug:print-info 0 *default-log-port* "Syncer still running, skipping syncer start.") + (system sync-cmd)) + (set! *sync-in-progress* #f))))))) + (if (< (file-modification-time tmpdb) + (file-modification-time dbfullname)) + (debug:print 0 *default-log-port* "Skipping sync, "tmpdb" older than "dbfullname) + (if synclock-mod-time + (if (< (- (current-seconds) synclock-mod-time) 20) ;; something wrong with sync, remove file + (begin + (handle-exceptions + exn + #f + (delete-file synclock-file)) + (thethread)) + (debug:print 0 *default-log-port* "Skipping sync, lockfile "synclock-file" found.")) + (thethread))))))) + ;; (dbmod:sync-tables tables #f db cachedb) + ;; (if db + (dbmod:sync-gasket tables #f cachedb db dbfullname 'fromdest keys) ;; ) ;; load into cachedb + (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second? + dbstruct)) + +;; (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard +;; (dbmod:sync-tables tables last-update cachedb db) +;; (dbmod:sync-tables tables last-update db cachedb)))) +;; +;; direction: 'fromdest 'todest +;; +(define (dbmod:sync-gasket tables last-update cachedb dbh dbfname direction keys) + (assert (sqlite3:database? cachedb) "FATAL: sync-gasket: cachedb is not a db") + (assert (sqlite3:database? cachedb) "FATAL: sync-gasket: dbh is not a db") + (debug:print-info 0 *default-log-port* "dbmod:sync-gasket called with sync-method="(dbfile:sync-method)) + (case (dbfile:sync-method) + ((none) #f) + ((attach) + (dbmod:attach-sync tables cachedb dbfname direction)) + ((newsync) + (dbmod:new-sync tables cachedb dbh dbfname direction)) + (else + (case direction + ((todisk) ;; i.e. from the cache db to the mtrah db + (dbmod:sync-tables tables last-update keys cachedb dbh)) + (else + (dbmod:sync-tables tables last-update keys dbh cachedb)))))) + +(define (dbmod:close-db dbstruct) + ;; do final sync to disk file + ;; (do-sync ...) + (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct))) + +;;====================================================================== +;; Sync db +;;====================================================================== + +(define (dbmod:calc-use-last-update has-last-update fields last-update) + (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) + #t) + ((and last-update (not (pair? last-update)) (not (number? last-update))) + (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields + #f) + (else + #f))) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; dbs are sqlite3 db handles +;; +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +;; Use (db:sync-all-tables-list keys) to get the tbls input +;; +(define (dbmod:sync-tables tbls last-update keys fromdb todb) + (debug:print-info 2 *default-log-port* "dbmod:sync-tables called, from: "fromdb", to: "todb) + (assert (sqlite3:database? fromdb) "FATAL: dbmod:sync-tables called with fromdb not a database" fromdb) + (assert (sqlite3:database? todb) "FATAL: dbmod:sync-tables called with fromdb not a database" todb) + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (dbmod:calc-use-last-update has-last-update fields last-update)) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) ;; BBHERE + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " WHERE " last-update-field " >= " last-update-value) + "") + ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) + (todat (make-hash-table)) + (count 0) + (field-names (map car fields))) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + ;; store a list of all rows in the table in fromdat, up to batch-len. + ;; Then add fromdat to the fromdats list, clear fromdat and repeat. + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))))) + fromdb + full-sel) + + ;; Count less than batch-len as a record + (if (> (length fromdat) 0) + (set! totrecords (+ totrecords 1))) + + ;; tack on remaining records in fromdat + (if (not (null? fromdat)) + (set! fromdats (cons fromdat fromdats))) + + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + todb + full-sel) + + ;; first pass implementation, just insert all changed rows + + (let* ((db todb) + (drp-trigger (if (member "last_update" field-names) + (db:drop-trigger db tablename) + #f)) + (has-last-update (member "last_update" field-names)) + (is-trigger-dropped (if has-last-update + (db:is-trigger-dropped db tablename) + #f)) + (stmth (sqlite3:prepare db full-ins)) + (changed-rows 0)) + (for-each + (lambda (fromdat-lst) + (mutex-lock! *db-transaction-mutex*) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))) + (set! changed-rows (+ changed-rows 1)))))) + fromdat-lst))) + (mutex-unlock! *db-transaction-mutex*)) + fromdats) + ;; (debug:print 0 *default-log-port* "row="row) + + (sqlite3:finalize! stmth) + (if (member "last_update" field-names) + (db:create-trigger db tablename))) + )) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (or ;; (debug:debug-mode 12) + (common:low-noise-print 120 "db sync") + (> runtime 500)))) ;; low and high sync times treated as separate. + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)))) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count)) + +(define (has-last-update dbh tablename) + (let* ((has-last #f)) + (sqlite3:for-each-row + (lambda (name) + (if (equal? name "last_update") + (set! has-last #t))) + dbh + (conc "SELECT name FROM pragma_table_info('"tablename"') as tblInfo;")) + has-last)) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; +;; direction = fromdest, todest +;; mode = 'full, 'incr +;; +;; Idea: youngest in dest is last_update time +;; +(define (dbmod:attach-sync tables dbh destdbfile direction #!key + (mode 'full) + (no-update '("keys")) ;; do + ) + (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile) + (if (not (sqlite3:auto-committing? dbh)) + (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.") + (let* ((table-names (map car tables)) + (dest-exists (file-exists? destdbfile))) + (assert dest-exists "FATAL: sync called with non-existant file, "destdbfile) + ;; attach the destdbfile + ;; for each table + ;; insert into dest. select * from src.
where last_update>last_update + ;; done + (debug:print 0 *default-log-port* "Attaching "destdbfile" as auxdb") + (sqlite3:execute dbh (conc "ATTACH '"destdbfile"' AS auxdb;")) + (for-each + (lambda (table) + (let* ((tbldat (alist-ref table tables equal?)) + (fields (map car tbldat)) + (no-id-fields (filter (lambda (x)(not (equal? x "id"))) fields)) + (fields-str (string-intersperse fields ",")) + (no-id-fields-str (string-intersperse no-id-fields ",")) + (dir (eq? direction 'todest)) + (fromdb (if dir "" "auxdb.")) + (todb (if dir "auxdb." "")) + (set-str (string-intersperse + (map (lambda (field) + (conc fromdb field"="todb field)) + fields) + ",")) + (stmt1 (conc "INSERT OR IGNORE INTO "todb table + " SELECT * FROM "fromdb table";")) + (stmt8 (conc "UPDATE "todb table" SET ("no-id-fields-str") = (SELECT "no-id-fields-str" FROM "fromdb table" WHERE "todb table".id="fromdb table".id" + (if (member "last_update" fields) + (conc " AND "fromdb table".last_update > "todb table".last_update);") + ");"))) + (start-ms (current-milliseconds))) + ;; (debug:print 0 *default-log-port* "stmt8="stmt8) + ;; (if (sqlite3:auto-committing? dbh) + ;; (begin + (mutex-lock! *db-transaction-mutex*) + (sqlite3:with-transaction + dbh + (lambda () + (debug:print-info 0 *default-log-port* "Sync from "fromdb table" to "todb table" using "stmt1) + (sqlite3:execute dbh stmt1) ;; get all new rows + + #;(if (member "last_update" fields) + (sqlite3:execute dbh stmt8)) ;; get all updated rows + ;; (sqlite3:execute dbh stmt5) + ;; (sqlite3:execute dbh stmt4) ;; if it worked this would be better for incremental up + ;; (sqlite3:execute dbh stmt6) + )) + (debug:print 0 *default-log-port* "Synced table "table + " in "(- (current-milliseconds) start-ms)"ms") ;; ) + (mutex-unlock! *db-transaction-mutex*))) + + ;; (debug:print 0 *default-log-port* "Skipping sync of table "table" due to transaction in flight.")))) + table-names) + (sqlite3:execute dbh "DETACH auxdb;")))) + +;; FAILED ATTEMPTS + + ;; (if (not (has-last-update dbh table)) + ;; (sqlite3:execute dbh (conc "ALTER TABLE "table" ADD COLUMN last_update INTEGER;"))) + ;; (if (not (has-last-update dbh (conc "auxdb."table))) + ;; (sqlite3:execute dbh (conc "ALTER TABLE auxdb."table" ADD COLUMN last_update INTEGER;"))) + + ;; (stmt2 (conc "INSERT OR REPLACE INTO "todb table + ;; " SELECT * FROM "fromdb table" WHERE " + ;; fromdb table".last_update > " + ;; todb table".last_update;")) + ;; (stmt3 (conc "INSERT OR REPLACE INTO "todb"."table + ;; " SELECT * FROM "fromdb table";")) + ;; (stmt4 (conc "DELETE FROM "todb table" WHERE "fromdb + ;; table ".last_update > "todb table".last_update;")) + ;; (stmt5 (conc "DELETE FROM "todb table";")) + ;; (stmt6 (conc "INSERT OR REPLACE INTO "todb table" ("fields-str") SELECT "fields-str" FROM "fromdb table";")) + ;; (stmt7 (conc "UPDATE "todb table" SET "set-str (if (member "last_update" fields) + ;; (conc " WHERE "fromdb table".last_update > "todb table".last_update;") + ;; ";"))) + +;; prefix is "" or "auxdb." +;; +;; (define (dbmod:last-update-patch dbh prefix) +;; (let (( + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; +;; direction = fromdest, todest +;; mode = 'full, 'incr +;; +;; Idea: youngest in dest is last_update time +;; +(define (dbmod:new-sync tables dbh1 dbh2 destdbfile direction #!key + (mode 'full)) + (debug:print 0 *default-log-port* "Doing new-sync "direction" "destdbfile) + (if (not (sqlite3:auto-committing? dbh1)) + (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.") + (let* ((table-names (map car tables)) + (dest-exists (file-exists? destdbfile))) + (assert dest-exists "FATAL: sync called with non-existant file, "destdbfile) + (for-each + (lambda (table) + (let* ((tbldat (alist-ref table tables equal?)) + (fields (map car tbldat)) + (no-id-fields (filter (lambda (x)(not (equal? x "id"))) fields)) + (questionmarks (string-intersperse (make-list (length no-id-fields) "?") ",")) + (fields-str (string-intersperse fields ",")) + (no-id-fields-str (string-intersperse no-id-fields ",")) + (dir (eq? direction 'todest)) + (fromdb (if dir dbh1 dbh2)) + (todb (if dir dbh2 dbh1)) + (set-str (string-intersperse + (map (lambda (field) + (conc fromdb field"="todb field)) + fields) + ",")) + ;; (stmt1 (conc "INSERT OR IGNORE INTO "todb table + ;; " SELECT * FROM "fromdb table";")) + ;; (stmt8 (conc "UPDATE "todb table" SET ("no-id-fields-str") = (SELECT "no-id-fields-str" FROM "fromdb table " WHERE "todb table".id="fromdb table".id" + ;; (if (member "last_update" fields) + ;; (conc " AND "fromdb table".last_update > "todb table".last_update);") + ;; ");"))) + (stmt1 (conc "SELECT MAX(last_update) FROM "table";")) ;; use the highest last_update as your time reference + (stmt2 (conc "SELECT no-id-fields-str FROM "table" WHERE last_update>?;")) + (stmt3 (conc "UPDATE "table" SET ("no-id-fields-str") = ("questionmarks") WHERE id=?;")) + (start-ms (current-milliseconds))) + (debug:print 0 *default-log-port* "stmt3="stmt3) + (if (sqlite3:auto-committing? dbh1) + (begin + (sqlite3:with-transaction + dbh1 + (lambda () + (sqlite3:execute dbh1 stmt1) ;; get all new rows + + #;(if (member "last_update" fields) + (sqlite3:execute dbh1 stmt8)) ;; get all updated rows + ;; (sqlite3:execute dbh stmt5) + ;; (sqlite3:execute dbh stmt4) ;; if it worked this would be better for incremental up + ;; (sqlite3:execute dbh stmt6) + )) + (debug:print 0 *default-log-port* "Synced table "table + " in "(- (current-milliseconds) start-ms)"ms")) + (debug:print 0 *default-log-port* "Skipping sync of table "table" due to transaction in flight.")))) + table-names) + (sqlite3:execute dbh1 "DETACH auxdb;")))) + + + + +;;====================================================================== +;; Moved from dbfile +;;====================================================================== + +;; wait up to aprox n seconds for a journal to go away +;; +(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) + (if (not (string? path)) + (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)") + (let ((fullpath (conc path "-journal"))) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)) + (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") + #t) ;; if stuff goes wrong just allow it to move on + (let loop ((journal-exists (file-exists? fullpath)) + (count n)) ;; wait ten times ... + (if journal-exists + (begin + (if (and waiting-msg + (eq? (modulo n 30) 0)) + (debug:print 0 *default-log-port* waiting-msg)) + (if (> count 0) + (begin + (thread-sleep! 1) + (loop (file-exists? fullpath) + (- count 1))) + (begin + (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") + (if remove (system (conc "rm -rf " fullpath))) + #f))) + #t)))))) + + +;;====================================================================== +;; M E T A G E T A N D S E T V A R S +;;====================================================================== + +;; returns number if string->number is successful, string otherwise +;; also updates *global-delta* +;; +(define (db:get-var dbstruct var) + (let* ((res #f)) + (db:with-db + dbstruct #f #f ;; for the moment vars are only stored in main.db + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + "SELECT val FROM metadat WHERE var=?;" var) + ;; convert to number if can + (if (string? res) + (let ((valnum (string->number res))) + (if valnum (set! res valnum)))) + res)))) + +(define (db:inc-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var)))) + +(define (db:dec-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var)))) + +;; This was part of db:get-var. It was used to estimate the load on +;; the database files. +;; +;; scale by 10, average with current value. +;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) +;; (if throttle throttle 0.01))) +;; 2)) +;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit +;; (begin +;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) +;; (set! *last-global-delta-printed* *global-delta*))) + +(define (db:set-var dbstruct var val) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") + var val)))) + +(define (db:add-var dbstruct var val) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+? WHERE var=?;") val var)))) + +(define (db:del-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (dbdat db) + (sqlite3:execute (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var)))) + +(define (db:get-toplevels-and-incompletes dbstruct run-id running-deadtime remotehoststart-deadtime) + (let* ((toplevels '()) + (oldlaunched '()) + (incompleted '())) + (db:with-db + dbstruct run-id #t ;; not a write but problemtic + (lambda (dbdat db) + (let* ((stmth1 (db:get-cache-stmth + dbdat db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) + AND state IN ('RUNNING');")) + (stmth2 (db:get-cache-stmth + dbdat db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) + AND state IN ('REMOTEHOSTSTART');")) + (stmth3 (db:get-cache-stmth + dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests + WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 + AND state IN ('LAUNCHED');"))) + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) + (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" + test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds) + " event-time="event-time" run-duration="run-duration)))) + stmth1 + run-id running-deadtime) ;; default time 720 seconds + + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id + " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time + " run-duration="run-duration) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) + stmth2 + run-id remotehoststart-deadtime) ;; default time 230 seconds + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id + " 1 day since event_time marked") + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) + stmth3 + run-id)))) + (list incompleted oldlaunched toplevels))) + +;;====================================================================== +;; db to db sync +;;====================================================================== + +(define (dbmod:db-to-db-sync src-db dest-db last-update init-proc keys) + (if (and (file-exists? src-db) ;; can't proceed without a source + (file-read-access? src-db)) + (let* ((have-dest (file-exists? dest-db)) + (dest-file-wr (and have-dest + (file-write-access? dest-db))) ;; exists and writable + (dest-dir (or (pathname-directory dest-db) + ".")) + (dest-dir-wr (and (file-exists? dest-dir) + (file-write-access? dest-dir))) + (d-wr (or (and have-dest + dest-file-wr) + dest-dir-wr)) + (copied (if (and (not have-dest) + dest-dir-wr) + (begin + (file-copy src-db dest-db) + #t) + #f))) + (if copied + (begin + (debug:print-info 0 *default-log-port* "db-to-db-sync done with file-copy") + #t) + (let* ((tables (db:sync-all-tables-list keys)) + (sdb (dbmod:safely-open-db src-db init-proc #t)) + (ddb (dbmod:safely-open-db dest-db init-proc d-wr)) + (res (dbmod:sync-gasket tables last-update sdb ddb dest-db 'todisk keys))) + (sqlite3:finalize! sdb) + (sqlite3:finalize! ddb) + res))) + #f)) ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -16,22 +16,27 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== +(declare (unit dcommon)) + +(declare (uses gutils)) +(declare (uses db)) +(declare (uses commonmod)) +(declare (uses rmtmod)) + (use format) (require-library iup) (import (prefix iup iup:)) (use canvas-draw) (import canvas-draw-iup) (use regex typed-records matchable) -(declare (unit dcommon)) - -(declare (uses gutils)) -(declare (uses db)) -;; (declare (uses synchash)) +(import commonmod + rmtmod + debugprint) (include "megatest-version.scm") (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") @@ -635,11 +640,12 @@ (common:max (map cadr col-indices)))) (max-visible (max (- (dboard:tabdat-num-tests tabdat) 15) 3)) (max-col-vis (if (> max-col 10) 10 max-col)) (numrows 1) (numcols 1)) - (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS") + (if (common:low-noise-print 60 "runs-stats-update-clear") + (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS")) (iup:attribute-set! stats-matrix "NUMCOL" max-col ) (iup:attribute-set! stats-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20 (iup:attribute-set! stats-matrix "NUMCOL_VISIBLE" max-col-vis) (iup:attribute-set! stats-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row)) ;;(print "row-indices: " row-indices " col-indices: " col-indices) @@ -704,11 +710,13 @@ #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10))) + (let ((servers (case (rmt:transport-mode) + ((http)(server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath* limit: 10))) + (else '())))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) @@ -1418,36 +1426,10 @@ (begin (set! *last-monitor-update-time* run-update-time) ;; monitor-modtime) #t) #f))) -;; DOES NOT WORK RELIABLY WITH /tmp WAL mode files. Timestamps only change when the db -;; is closed (I think). If db dir starts with /tmp always return true -;; -(define (dashboard:database-changed? commondat tabdat #!key (context-key 'default)) - (let* ((run-update-time (current-seconds)) - (dbdir (dboard:tabdat-dbdir tabdat)) - (modtime (dashboard:get-youngest-run-db-mod-time dbdir)) - (recalc (dashboard:recalc modtime - (dboard:commondat-please-update commondat) - (dboard:get-last-db-update tabdat context-key)))) - ;; (dboard:tabdat-last-db-update tabdat)))) - (if recalc - (dboard:set-last-db-update! tabdat context-key run-update-time)) - (dboard:commondat-please-update-set! commondat #f) - recalc)) - -(define (dashboard:get-youngest-run-db-mod-time dbdir) - (handle-exceptions - exn - (begin - (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) - " db-dir="dbdir ", exn=" exn) - (current-seconds)) ;; something went wrong - just print an error and return current-seconds - (common:max (map (lambda (filen) - (file-modification-time filen)) - (glob (conc dbdir "/*.db*")))))) (define (dboard:get-last-db-update tabdat context) (hash-table-ref/default (dboard:tabdat-last-db-update tabdat) context 0)) (define (dboard:set-last-db-update! tabdat context newtime) Index: debugprint.scm ================================================================== --- debugprint.scm +++ debugprint.scm @@ -3,41 +3,49 @@ (declare (uses mtargs)) (module debugprint * -;;(import scheme chicken data-structures extras files ports) +(import scheme) +(cond-expand + (chicken-4 (import scheme chicken data-structures posix ports extras - - ;; scheme - ;; chicken.base - ;; chicken.string - ;; chicken.time - ;; chicken.time.posix - ;; chicken.port - ;; chicken.process-context - ;; chicken.process-context.posix - (prefix mtargs args:) srfi-1 ;; system-information - ) + )) + (chicken-5 + (import + scheme + chicken.base + chicken.string + chicken.time + chicken.time.posix + chicken.port + chicken.process-context + chicken.process-context.posix + + srfi-1 + (prefix mtargs args:)) + + (define setenv set-environment-variable!) + )) ;;====================================================================== ;; debug stuff ;;====================================================================== (define verbosity (make-parameter '())) (define *default-log-port* (current-error-port)) (define debug:print-logger (make-parameter #f)) ;; set to a proc to call on every logging print - + (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") (args:get-arg "-debug-noprop") (get-environment-variable "MT_DEBUG_MODE")))) (verbosity (debug:calc-verbosity debugstr 'q)) @@ -45,11 +53,11 @@ ;; if we were handed a bad verbosity rule then we will override it with 1 and continue (if (not (verbosity))(verbosity 1)) (if (and (not (args:get-arg "-debug-noprop")) (or (args:get-arg "-debug") (not (get-environment-variable "MT_DEBUG_MODE")))) - (setenv #;set-environment-variable! "MT_DEBUG_MODE" (if (list? (verbosity)) + (setenv "MT_DEBUG_MODE" (if (list? (verbosity)) (string-intersperse (map conc (verbosity)) ",") (conc (verbosity))))))) ;; check verbosity, #t is ok (define (debug:check-verbosity verbosity vstr) @@ -114,15 +122,15 @@ ((and (number? vb) (list? n)) (member vb n)) (else #f)))) -(define (debug:handle-remote-logging params) - (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now - ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " - (string-intersperse (map conc params) " ") "; " - (string-intersperse (command-line-arguments) " "))))) +;; (define (debug:handle-remote-logging params) +;; (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now +;; ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " +;; (string-intersperse (map conc params) " ") "; " +;; (string-intersperse (command-line-arguments) " "))))) (define debug:enable-timestamp (make-parameter #t)) (define (debug:timestamp) (if (debug:enable-timestamp) Index: diff-report.scm ================================================================== --- diff-report.scm +++ diff-report.scm @@ -16,11 +16,17 @@ ;; along with Megatest. If not, see . ;; (declare (unit diff-report)) (declare (uses common)) +(declare (uses debugprint)) (declare (uses rmt)) +(declare (uses rmtmod)) +(declare (uses commonmod)) +(import commonmod + rmtmod + debugprint) (include "common_records.scm") (use matchable) (use fmt) (use ducttape-lib) ADDED docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf Index: docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf ================================================================== --- /dev/null +++ docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf cannot compute difference between binary files ADDED docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf Index: docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf ================================================================== --- /dev/null +++ docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf cannot compute difference between binary files ADDED docs/reference/queues-dont-fix-overload.pdf Index: docs/reference/queues-dont-fix-overload.pdf ================================================================== --- /dev/null +++ docs/reference/queues-dont-fix-overload.pdf cannot compute difference between binary files Index: env.scm ================================================================== --- env.scm +++ env.scm @@ -17,10 +17,16 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit env)) + +(declare (uses debugprint)) +(declare (uses mtargs)) + +(import (prefix mtargs args:) + debugprint) (use sql-de-lite) ;; srfi-1 posix regex regex-case srfi-69 srfi-18 call-with-environment-variables) (define (env:open-db fname) (let* ((db-exists (common:file-exists? fname)) Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -17,20 +17,27 @@ ;; along with Megatest. If not, see . ;; ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(use srfi-1 posix regex srfi-69 directory-utils call-with-environment-variables posix-extras - z3 csv typed-records pathname-expand matchable) - (declare (unit ezsteps)) (declare (uses db)) (declare (uses common)) +(declare (uses debugprint)) (declare (uses items)) (declare (uses runconfig)) -;; (declare (uses sdb)) -;; (declare (uses filedb)) +(declare (uses commonmod)) +(declare (uses rmtmod)) +(declare (uses mtargs)) + +(use srfi-1 posix regex srfi-69 directory-utils call-with-environment-variables posix-extras + z3 csv typed-records pathname-expand matchable) + +(import commonmod + debugprint + rmtmod + (prefix mtargs args:)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") @@ -38,11 +45,11 @@ ;;(rmt:get-test-info-by-id run-id test-id) -> testdat ;; TODO: deprecate me in favor of ezsteps.scm ;; -(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat) +(define (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat) (let* ((stepname (car ezstep)) ;; do stuff to run the step (stepinfo (cadr ezstep)) ;; (let ((info (cadr ezstep))) ;; (if (proc? info) "" info))) ;; (stepproc (let ((info (cadr ezstep))) @@ -63,11 +70,12 @@ (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ (logpro-file (conc stepname ".logpro")) (html-file (conc stepname ".html")) (dat-file (conc stepname ".dat")) (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) - (logpro-used (common:file-exists? logpro-file))) + (logpro-used (common:file-exists? logpro-file)) + (mtexepath (common:get-megatest-exe-path))) (setenv "MT_STEP_NAME" stepname) (hash-table-set! all-steps-dat stepname `((params . ,paramparts))) (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) @@ -96,11 +104,11 @@ (debug:print 4 *default-log-port* "script: " script) (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) ;; now launch the actual process (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (list (cons "PATH" mtexepath)) (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 (pid #f)) (let ((proc (lambda () (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) Index: genexample.scm ================================================================== --- genexample.scm +++ genexample.scm @@ -17,11 +17,18 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit genexample)) +(declare (uses mtargs)) +(declare (uses debugprint)) +(declare (uses rmtmod)) + (use posix regex matchable) +(import (prefix mtargs args:) + rmtmod + debugprint) (include "db_records.scm") (define genexample:example-logpro #<. - -(require-extension (srfi 18) extras tcp s11n) - - -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) - -(use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) - -;; Configurations for server -(tcp-buffer-size 2048) -(max-connections 2048) - -(declare (unit http-transport)) - -(declare (uses common)) -(declare (uses db)) -(declare (uses tests)) -(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. -(declare (uses server)) -;; (declare (uses daemon)) -(declare (uses portlogger)) -(declare (uses rmt)) -(declare (uses dbfile)) -(declare (uses commonmod)) - -(include "common_records.scm") -(include "db_records.scm") -(include "js-path.scm") - -(import dbfile commonmod) - -(require-library stml) -(define (http-transport:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; S E R V E R -;; ====================================================================== - -;; Call this to start the actual server -;; - -(define *db:process-queue-mutex* (make-mutex)) - -(define (http-transport:run hostn) - ;; Configurations for server - (tcp-buffer-size 2048) - (max-connections 2048) - (debug:print 2 *default-log-port* "Attempting to start the server ...") - (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (start-port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.server-start"))) - (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) - ;; set some parameters for the server - (root-path (if link-tree-path - link-tree-path - (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! - (handle-directory spiffy-directory-listing) - (handle-exception (lambda (exn chain) - (signal (make-composite-condition - (make-property-condition - 'server - 'message "server error"))))) - - ;; http-transport:handle-directory) ;; simple-directory-handler) - ;; Setup the web server and a /ctrl interface - ;; - (vhost-map `(((* any) . ,(lambda (continue) - ;; open the db on the first call - ;; This is were we set up the database connections - (let* (($ (request-vars source: 'both)) - (dat ($ 'dat)) - (res #f)) - (cond - ((equal? (uri-path (request-uri (current-request))) - '(/ "api")) - (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc - headers: '((content-type text/plain))) - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*)) - ((equal? (uri-path (request-uri (current-request))) - '(/ "")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "json_api")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "runs")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ any)) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "hey")) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "jquery3.1.0.js")) - (send-response body: (http-transport:show-jquery) - headers: '((content-type application/javascript)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "test_log")) - (send-response body: (http-transport:html-test-log $) - headers: '((content-type text/HTML)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "dashboard")) - (send-response body: (http-transport:html-dboard $) - headers: '((content-type text/HTML)))) - (else (continue)))))))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) - (with-output-to-file start-file (lambda ()(print (current-process-id))))) - (http-transport:try-start-server ipaddrstr start-port))) - -;; This is recursively run by http-transport:run until sucessful -;; -(define (http-transport:try-start-server ipaddrstr portnum) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) - (if (not config-use-proxy) - (determine-proxy (constantly #f))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (if config-hostname ;; this is a hint to bind directly - (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-") - ;; ipaddrstr - ;; config-hostname)) - (start-server port: portnum)) - (portlogger:open-run-close portlogger:set-port portnum "released") - (debug:print 1 *default-log-port* "INFO: server has been stopped")))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;;====================================================================== -;; C L I E N T S -;;====================================================================== - -(define *http-mutex* (make-mutex)) - -;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here -;; I'm pretty sure it is defunct. - -;; This next block all imported en-mass from the api branch -(define *http-requests-in-progress* 0) -(define *http-connections-next-cleanup* (current-seconds)) - -(define (http-transport:get-time-to-cleanup) - (let ((res #f)) - (mutex-lock! *http-mutex*) - (set! res (> (current-seconds) *http-connections-next-cleanup*)) - (mutex-unlock! *http-mutex*) - res)) - -(define (http-transport:inc-requests-count) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) - ;; Use this opportunity to slow things down iff there are too many requests in flight - (if (> *http-requests-in-progress* 5) - (begin - (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") - (thread-sleep! 1))) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count proc) - (mutex-lock! *http-mutex*) - (proc) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count-and-close-all-connections) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds - (if (> *http-requests-in-progress* 0) - (if (> etime (current-seconds)) - (begin - (thread-sleep! 0.05) - (loop etime)) - (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) - (close-all-connections!))) - (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:inc-requests-and-prep-to-close-all-connections) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) - -;; Send "cmd" with json payload "params" to serverdat and receive result -;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) - (let* ((fullurl (if (vector? serverdat) - (http-transport:server-dat-get-api-req serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1)))) - (res (vector #f "uninitialized")) - (success #t) - (sparams (db:obj->string params transport: 'http)) - (runremote (or area-dat *runremote*)) - (server-id (if (vector? serverdat) - (http-transport:server-dat-get-server-id serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1))))) - (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) - - ;; set up the http-client here - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - ;; send the data and get the response - ;; extract the needed info from the http data and - ;; process and return it. - (let* ((send-recieve (lambda () - (mutex-lock! *http-mutex*) - ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) - ;; ((exn http client-error) e (print e))) - (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. - success - (db:string->obj - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (set! success #f) - (if (debug:debug-mode 1) - (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") - (begin - (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) - (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) - (debug:print 0 *default-log-port* " call-chain: " call-chain))) - ;; what if another thread is communicating ok? Can't happen due to mutex - (set! *runremote* #f) - (set! runremote #f) - ;; (if runremote - ;; (remote-conndat-set! runremote #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? - (mutex-unlock! *http-mutex*) - ;; (signal (make-composite-condition - ;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;; "communications failed" - (close-all-connections!) - (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key (or server-id "thekey")) - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) - transport: 'http) - 0)) ;; added this speculatively - ;; Shouldn't this be a call to the managed call-all-connections stuff above? - ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections? - (mutex-unlock! *http-mutex*) - )) - (time-out (lambda () - (thread-sleep! 45) - (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") - #f)) - (th1 (make-thread send-recieve "with-input-from-request")) - (th2 (make-thread time-out "time out"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - (vector-set! res 0 success) - (thread-terminate! th2) - (if (vector? res) - (if (vector-ref res 0) ;; this is the first flag or the second flag? - (let* ((res-dat (vector-ref res 1))) - (if (and (string? res-dat) (string-contains res-dat "server-id mismatch")) - (signal (make-composite-condition - (make-property-condition - 'servermismatch - 'message (vector-ref res 1)))) - res)) ;; this is the *inner* vector? seriously? why? - (if (debug:debug-mode 11) - (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it - (print-call-chain (current-error-port)) - (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 11 *default-log-port* " server call chain:") - (pp (vector-ref res 1) (current-error-port)) - (signal (vector-ref res 0))) - res)) - (signal (make-composite-condition - (make-property-condition - 'timeout - 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) - -;; careful closing of connections stored in *runremote* -;; -(define (http-transport:close-connections #!key (area-dat #f)) - (let* ((runremote (or area-dat *runremote*)) - (server-dat (if runremote - (remote-conndat runremote) - #f))) ;; (hash-table-ref/default *runremote* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (close-connection! api-dat) - (close-idle-connections!) - #t)) - #f))) - - -(define (make-http-transport:server-dat)(make-vector 6)) -(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) -(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) -(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) -(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) -(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) -(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) -;(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) -(define (http-transport:server-dat-get-server-id vec) (vector-ref vec 6)) - -(define (http-transport:server-dat-make-url vec) - (if (and (http-transport:server-dat-get-iface vec) - (http-transport:server-dat-get-port vec)) - (conc "http://" - (http-transport:server-dat-get-iface vec) - ":" - (http-transport:server-dat-get-port vec)) - #f)) - -(define (http-transport:server-dat-update-last-access vec) - (if (vector? vec) - (vector-set! vec 5 (current-seconds)) - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) - -;; -;; connect -;; -(define (http-transport:client-connect iface port server-id) - (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) - server-dat)) - - - - -;; run http-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (http-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((servinfofile #f) - (sdat #f) - (no-sync-db (db:open-no-sync-db)) - (tmp-area (common:get-db-tmp-area)) - (started-file (conc tmp-area "/.server-started")) - (server-start-time (current-seconds)) - (server-info (let loop ((start-time (current-seconds)) - (changed #t) - (last-sdat "not this")) - (begin ;; let ((sdat #f)) - (thread-sleep! 0.01) - (debug:print-info 0 *default-log-port* "Waiting for server alive signature") - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if (and sdat - (not changed) - (> (- (current-seconds) start-time) 2)) - (let* ((servinfodir (conc *toppath*"/.servinfo")) - (ipaddr (car sdat)) - (port (cadr sdat)) - (servinf (conc servinfodir"/"ipaddr":"port))) - (set! servinfofile servinf) - (if (not (file-exists? servinfodir)) - (create-directory servinfodir #t)) - (with-output-to-file servinf - (lambda () - (let* ((serv-id (server:mk-signature))) - (set! *server-id* serv-id) - (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)) - (print "started: "(seconds->year-week/day-time (current-seconds)))))) - (set! *on-exit-procs* (cons - (lambda () - (delete-file* servinf)) - *on-exit-procs*)) - ;; put data about this server into a simple flat file host.port - (debug:print-info 0 *default-log-port* "Received server alive signature") - #;(common:save-pkt `((action . alive) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat))) - *configdat* #t) - sdat) - (begin - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (let* ((ipaddr (car sdat)) - (port (cadr sdat)) - (servinf (conc *toppath*"/.servinfo/"ipaddr":"port))) - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - ;; (delete-file* servinf) ;; handled by on-exit, can be removed - #;(common:save-pkt `((action . died) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat)) - (msg . "Transport died?")) - *configdat* #t) - (exit)) - (loop start-time - (equal? sdat last-sdat) - sdat))))))) - (iface (car server-info)) - (port (cadr server-info)) - (last-access 0) - (server-timeout (server:expiration-timeout)) - (server-going #f) - (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - - (handle-exceptions - exn - (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) - (with-output-to-file started-file (lambda ()(print (current-process-id))))) - - (let loop ((count 0) - (server-state 'available) - (bad-sync-count 0) - (start-time (current-milliseconds))) - - ;; Use this opportunity to sync the tmp db to megatest.db - (if (not server-going) ;; *dbstruct-dbs* - (begin - (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!! - (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. - (if (and no-sync-db - (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :) - (begin - (if (common:low-noise-print 120 "sync-all-print") - (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))) - (db:all-db-sync *dbstruct-dbs*)))) - - ;; when things go wrong we don't want to be doing the various queries too often - ;; so we strive to run this stuff only every four seconds or so. - (let* ((sync-time (- (current-milliseconds) start-time)) - (rem-time (quotient (- 4000 sync-time) 1000))) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time))) - - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) - - ;; Check that iface and port have not changed (can happen if server port collides) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - - (if (not (equal? sdat (list iface port))) - (let ((new-iface (car sdat)) - (new-port (cadr sdat))) - (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") - (set! iface new-iface) - (set! port new-port) - (if (not *server-id*) - (set! *server-id* (server:mk-signature))) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) - (flush-output *default-log-port*))) - - ;; Transfer *db-last-access* to last-access to use in checking that we are still alive - (mutex-lock! *heartbeat-mutex*) - (set! last-access *db-last-access*) - (mutex-unlock! *heartbeat-mutex*) - - (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) - (begin - (if (not *server-id*) - (set! *server-id* (server:mk-signature))) - (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) - (flush-output *default-log-port*))) - (if (common:low-noise-print 60 "dbstats") - (begin - (debug:print 0 *default-log-port* "Server stats:") - (db:print-current-query-stats))) - (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) - (cond - ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (let ((curr-time (current-seconds))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn) - (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter - (not *server-overloaded*) - (file-exists? servinfofile)) - (change-file-times servinfofile curr-time curr-time))) - (if (or (common:low-noise-print 120 "start new server") - (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another - (begin - (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...") - (server:kind-run *toppath*) - (if (> *api-process-request-count* 100) - (begin - (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile) - (delete-file* servinfofile))))))) - (loop 0 server-state bad-sync-count (current-milliseconds))) - (else - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (http-transport:server-shutdown port))))))) - -(define (http-transport:server-shutdown port) - (begin - ;;(BB> "http-transport:server-shutdown called") - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - ;; - ;; start_shutdown - ;; - (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up - (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 1) - - ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - ;; (debug:print-info 0 *default-log-port* "Average cached write time " - ;; (if (eq? *number-of-writes* 0) - ;; "n/a (no writes)" - ;; (/ *writes-total-delay* - ;; *number-of-writes*)) - ;; " ms") - ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - ;; (debug:print-info 0 *default-log-port* "Average non-cached time " - ;; (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - ;; " ms") - - (db:print-current-query-stats) - #;(common:save-pkt `((action . exit) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - - ;; remove .servinfo file(s) here - - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (exit))) - -;; all routes though here end in exit ... -;; -;; start_server? -;; -(define (http-transport:launch) - ;; check that a server start is in progress, pause or exit if so - (let* ((tmp-area (common:get-db-tmp-area)) - (server-start (conc tmp-area "/.server-start")) - (server-started (conc tmp-area "/.server-started")) - (start-time (common:lazy-modification-time server-start)) - (started-time (common:lazy-modification-time server-started)) - (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting - (start-time-old (> (- (current-seconds) start-time) 5)) - (cleanup-proc (lambda (msg) - (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) - (full-serv-fname (conc *toppath* "/logs/" serv-fname)) - (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) - (debug:print 0 *default-log-port* msg) - (if (common:file-exists? full-serv-fname) - (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) - (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) - (exit))))) - #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago - (not server-starting)) - (begin - (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") - (exit))) - ;; lets not even bother to start if there are already three or more server files ready to go - #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) - (if (> num-alive 3) - (begin - (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) - (exit)))) - #;(common:save-pkt `((action . start) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))) - -;; (define (http-transport:server-signal-handler signum) -;; (signal-mask! signum) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; (thread-sleep! 1)) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) - -;;=============================================== -;; Java script -;;=============================================== -(define (http-transport:show-jquery) - (let* ((data (tests:readlines *java-script-lib*))) -(string-join data "\n"))) - - - -;;====================================================================== -;; web pages -;;====================================================================== - -(define (http-transport:html-test-log $) - (let* ((run-id ($ 'runid)) - (test-item ($ 'testname)) - (parts (string-split test-item ":")) - (test-name (car parts)) - - (item-name (if (equal? (length parts) 1) - "" - (cadr parts)))) - ;(print $) -(tests:get-test-log run-id test-name item-name))) - - -(define (http-transport:html-dboard $) - (let* ((page ($ 'page)) - (oup (open-output-string)) - (bdy "--------------------------") - - (ret (tests:dynamic-dboard page))) - (s:output-new oup ret) - (close-output-port oup) - - (set! bdy (get-output-string oup)) - (conc "

Dashboard

" bdy "

" ))) - -(define (http-transport:main-page) - (let ((linkpath (root-path))) - (conc "

" (pathname-strip-directory *toppath*) "

" - "" - "Run area: " *toppath* - "

Server Stats

" - (http-transport:stats-table) - "
" - (http-transport:runs linkpath) - "
" - ;; (http-transport:run-stats) - "" - ))) - -(define (http-transport:stats-table) - (mutex-lock! *heartbeat-mutex*) - (let ((res - (conc "
" - ;; "" - "" - "" - "" - ;; "" - "" - "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) - "n/a (no writes)" - (/ *writes-total-delay* - *number-of-writes*)) - " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - " ms
Last access" (seconds->time-string *db-last-access*) "
"))) - (mutex-unlock! *heartbeat-mutex*) - res)) - -(define (http-transport:runs linkpath) - (conc "

Runs

" - (string-intersperse - (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) - (map (lambda (p) - (conc "" p "
")) - files)) - " "))) - -#;(define (http-transport:run-stats) - (let ((stats (open-run-close db:get-running-stats #f))) - (conc "" - (string-intersperse - (map (lambda (stat) - (conc "")) - stats) - " ") - "
" (car stat) "" (cadr stat) "
"))) DELETED index-tree.scm Index: index-tree.scm ================================================================== --- index-tree.scm +++ /dev/null @@ -1,59 +0,0 @@ -;;====================================================================== -;; Copyright 2006-2013, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . -;; -;;====================================================================== - -;;====================================================================== -;; Tests -;;====================================================================== - -(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils) -(import (prefix sqlite3 sqlite3:)) - -(declare (unit tests)) -(declare (uses lock-queue)) -(declare (uses db)) -(declare (uses common)) -(declare (uses items)) -(declare (uses runconfig)) - -(include "common_records.scm") -(include "key_records.scm") -(include "db_records.scm") -(include "run_records.scm") -(include "test_records.scm") - -;; Populate the links tree with index.html files -;; -;; - start from most recent tests and work towards oldest -OR- -;; start from deepest hierarchy and work way up -;; - look up tests in megatest.db -;; - cross-reference the tests to stats.db -;; - if newer than event_time in stats.db or not registered in stats.db regenerate -;; - run du and store in stats.db -;; - when all tests at that level done generate next level up index.html -;; -;; include in rollup html index.html: -;; sum of du -;; counts of PASS, FAIL, RUNNING, REMOTEHOSTSTART, LAUNCHED, CHECK etc. -;; overall status -;; -;; include in test specific index.html: -;; host, uname, cpu graph, disk avail graph, steps, data -;; meta data, state, status, du -;; Index: items.scm ================================================================== --- items.scm +++ items.scm @@ -21,10 +21,14 @@ ;; (temperature "cool medium hot") ;; (season "summer winter fall spring"))) (declare (unit items)) (declare (uses common)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(import commonmod + debugprint) (include "common_records.scm") ;; Puts out all combinations (define (process-itemlist hierdepth curritemkey itemlist) Index: keys.scm ================================================================== --- keys.scm +++ keys.scm @@ -19,15 +19,22 @@ ;;====================================================================== ;; Run keys, these are used to hierarchially organise tests and run areas ;;====================================================================== -(use sqlite3 srfi-1 posix regex regex-case srfi-69) -(import (prefix sqlite3 sqlite3:)) - (declare (unit keys)) (declare (uses common)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(declare (uses mtargs)) + +(use sqlite3 srfi-1 posix regex regex-case srfi-69) +(import (prefix sqlite3 sqlite3:) + (prefix mtargs args:)) + +(import commonmod + debugprint) (include "key_records.scm") (include "common_records.scm") (define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -18,29 +18,43 @@ ;;====================================================================== ;; launch a task - this runs on the originating host, tests themselves ;; ;;====================================================================== + +(declare (unit launch)) +(declare (uses subrun)) +(declare (uses common)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(declare (uses configf)) +(declare (uses db)) +(declare (uses rmtmod)) +(declare (uses ezsteps)) +;; (declare (uses dbmod)) +(declare (uses dbfile)) +(declare (uses mtargs)) (use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables csv) (use typed-records pathname-expand matchable) -(import (prefix base64 base64:)) -(import (prefix sqlite3 sqlite3:)) - -(declare (unit launch)) -(declare (uses subrun)) -(declare (uses common)) -(declare (uses configf)) -(declare (uses db)) -(declare (uses ezsteps)) +(import (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + (prefix mtargs args:) +) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "megatest-fossil-hash.scm") + +(import commonmod + rmtmod + debugprint + ;; dbmod + dbfile) ;;====================================================================== ;; ezsteps ;;====================================================================== @@ -183,11 +197,11 @@ (tal (cdr ezstepslst)) (prevstep #f)) (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") ;; check exit-info (vector-ref exit-info 1) (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) - (let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)) + (let* ((logpro-used (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)) (stepname (car ezstep)) (stepparms (hash-table-ref all-steps-dat stepname))) (setenv "MT_STEP_NAME" stepname) (pp (hash-table->alist all-steps-dat)) ;; if logpro-used read in the stepname.dat file @@ -205,11 +219,11 @@ ) )))))) (define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) - (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) + (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "60"))) (start-seconds (current-seconds)) (calc-minutes (lambda () (inexact->exact (round (- @@ -239,13 +253,13 @@ (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg df #f))) (do-sync (or new-cpu-load new-disk-free over-time)) - (test-info (rmt:get-test-info-by-id run-id test-id)) - (state (db:test-get-state test-info)) - (status (db:test-get-status test-info)) + (test-info (rmt:get-test-state-status-by-id run-id test-id)) + (state (car test-info));; (db:test-get-state test-info)) + (status (cdr test-info));; (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond ((test-get-kill-request run-id test-id) @@ -259,11 +273,12 @@ (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING (set! kill-job? #f))) (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) - (launch:handle-zombie-tests run-id) + (if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty + (launch:handle-zombie-tests run-id)) (when do-sync ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) @@ -314,11 +329,11 @@ (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt ))) (mutex-unlock! m) ;; no point in sticking around. Exit now. But run end of run before exiting? - (launch:end-of-run-check run-id) + (launch:end-of-run-check run-id) (exit))) (if (hash-table-ref/default misc-flags 'keep-going #f) (begin (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta @@ -726,11 +741,13 @@ ) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) + (tests:summarize-items run-id test-id test-name #f)) + ;; BUG was this meant to be the antecnt of the if above? + ;; BUG was this meant to be the antecnt of the if above? (tests:summarize-test run-id test-id) ;; don't force - just update if no ;; Leave a .final-status file for the top level test (tests:save-final-status run-id test-id) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let* @@ -765,20 +782,28 @@ ;; new ;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup ;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na ;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED ;; 0 RUNNING ==> this is actually the first condition, should not get here - +(define *last-rollup* 0) (define (launch:end-of-run-check run-id ) (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) - (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) + (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) - (current-state (rmt:get-run-state run-id)) - (current-status (rmt:get-run-status run-id))) - ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing - (debug:print 0 *default-log-port* "Running test cnt :" running-cnt) - (rmt:set-state-status-and-roll-up-run run-id current-state current-status) + (current-state-status (rmt:get-run-state-status run-id)) + (current-state (car current-state-status)) ;; (rmt:get-run-state run-id)) + (current-status (cdr current-state-status))) ;; (rmt:get-run-status run-id))) + ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing + (debug:print 0 *default-log-port* "Running test cnt :" running-cnt) + ;; + ;; TODO: add a final rollup when run is done (if there isn't one already) + ;; + (if (or (< running-cnt 3) ;; have only few running + (> (- (current-seconds) *last-rollup*) 10)) ;; or haven't rolled up in past ten seconds + (begin + (rmt:set-state-status-and-roll-up-run run-id current-state current-status) + (set! *last-rollup* (current-seconds)))) (runs:update-junit-test-reporter-xml run-id) (cond ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) (if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id))) (begin @@ -806,42 +831,10 @@ (item-path (vector-ref running-test 11))) (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") (if (not (null? tal)) (loop (car tal) (cdr tal))))))))))) -;; replaced below with version that does not ssh if checking on localhost -#;(define (launch:is-test-alive host pid) - (if (and host pid (not (equal? host "n/a"))) - (let* ((cmd (conc "ssh " host " pstree -A " pid)) - (output (with-input-from-pipe cmd read-lines))) - (debug:print 2 *default-log-port* "Running " cmd " received " output) - (if (eq? (length output) 0) - #f - #t)) - #t)) - -;; this is a close duplicate of: -;; process:alist-on-host? -;; process:alive -;; -(define (launch:is-test-alive host pid) - (let* ((same-host (equal? host (get-host-name))) - (cmd (conc - (if same-host "" (conc "ssh "host" ")) - "pstree -A "pid))) - (if (and host pid - (not (equal? host "n/a"))) - - (let* ((output (if same-host - (with-input-from-pipe cmd read-lines) - (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines))) - (debug:print 2 *default-log-port* "Running " cmd " received " output) - (if (eq? (length output) 0) - #f - #t)) - #t))) ;; assuming bad query is about a live test is likely not the right thing to do? - (define (launch:kill-tests-if-dead run-id) (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) (let loop ((running-test (car running-tests)) (tal (cdr running-tests)) (kill-cnt 0)) @@ -852,11 +845,11 @@ (pid (rmt:test-get-top-process-pid run-id test-id)) (event-time (vector-ref running-test 5)) (duration (vector-ref running-test 12)) (flag 0) (curr-time (current-seconds))) - (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed + (if (and (< (+ event-time duration 600) curr-time) (not (commonmod:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed (begin (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") (set! flag 1) (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) (if (not (null? tal)) @@ -1131,11 +1124,14 @@ (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) (begin (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") (set! *toppath* #f) ;; force it to be false so we return #f #f)) - + + ;; needed by various transport and db modules + (dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*)) + ;; one more attempt to cache the configs for future reading (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) (mtcachef (car cachefiles)) (rccachef (cdr cachefiles))) @@ -1160,10 +1156,29 @@ ;; if have -append-config then read and append here (let ((cfname (args:get-arg "-append-config"))) (if (and cfname (file-read-access? cfname)) (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. + + ;; have config at this time, this is a good place to set params based on config file settings + (let* ((dbmode (configf:lookup *configdat* "setup" "dbcache-mode")) + (syncmode (configf:lookup *configdat* "setup" "sync-mode")) + (srvdebug (configf:lookup *configdat* "server" "debug-parameter"))) + (if dbmode + (begin + (debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode) + (dbcache-mode (string->symbol dbmode)))) + (if syncmode + (begin + (debug:print-info 0 *default-log-port* "Overriding syncmode to "syncmode) + (dbfile:sync-method (string->symbol syncmode)))) + (if srvdebug + (begin + (debug:print-info 0 *default-log-port* "Overriding server debug parameter to "srvdebug) + (tt-server-profile-string srvdebug))) + ) + *toppath*))) (define (get-best-disk confdat testconfig) (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) @@ -1366,13 +1381,15 @@ (not (directory-exists? toptest-path))) (begin (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) (handle-exceptions exn - (begin - (debug:print 0 *default-log-port* "failed to create directory " toptest-path ", exn=" exn) - #f) + (if (directory-exists? toptest-path) ;; it was likely created in parallel + #t + (begin + (debug:print-info 0 *default-log-port* "failed to create directory " toptest-path ", exn=" exn) + #f)) (create-directory toptest-path #t)) (hash-table-set! *toptest-paths* testname toptest-path))))) ;; The toptest path has been created, the link to the test in the linktree has ;; been created. Now, if this is an iterated test the real test dir must be created @@ -1380,14 +1397,16 @@ (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) (debug:print 2 *default-log-port* "Setting up sub test run area") (debug:print 2 *default-log-port* " - creating run area in " test-path) (handle-exceptions exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) - ", exiting, exn=" exn) - (exit 1)) + (if (directory-exists? test-path) + #t + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) + ", exiting, exn=" exn) + (exit 1))) (create-directory test-path #t)) (debug:print 2 *default-log-port* " - creating link from: " test-path "\n" " to: " lnktarget) @@ -1442,10 +1461,11 @@ ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) + (assert runname "FATAL: launch-test called with no runname") (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex (let* ( ;; (lock-key (conc "test-" test-id)) ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds ;; (if (car lock) @@ -1548,11 +1568,11 @@ (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) ;; (pp (hash-table->alist tconfig)) (set! diskpath (get-best-disk *configdat* tconfig)) (debug:print 2 *default-log-port* "best disk path = " diskpath) (if diskpath - (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) (debug:print-info 2 *default-log-port* "Using work area " work-area)) (begin (set! work-area (conc test-path "/tmp_run")) @@ -1567,11 +1587,11 @@ ;; (list 'serverinf *server-info*) #;(list 'homehost (let* ((hhdat (server:get-homehost))) (if hhdat (car hhdat) #f))) - (list 'serverurl (if *runremote* + #;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED (remote-server-url *runremote*) #f)) ;; (list 'areaname (common:get-testsuite-name)) (list 'toppath *toppath*) (list 'work-area work-area) DELETED lock-queue.scm Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ /dev/null @@ -1,253 +0,0 @@ -;; Copyright 2006-2013, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . -;; - -(use (prefix sqlite3 sqlite3:) srfi-18) - -(declare (unit lock-queue)) -(declare (uses common)) -(declare (uses tasks)) - -;;====================================================================== -;; attempt to prevent overlapping updates of rollup files by queueing -;; update requests in an sqlite db -;;====================================================================== - -;;====================================================================== -;; db record, -;;====================================================================== - -(define (make-lock-queue:db-dat)(make-vector 3)) -(define-inline (lock-queue:db-dat-get-db vec) (vector-ref vec 0)) -(define-inline (lock-queue:db-dat-get-path vec) (vector-ref vec 1)) -(define-inline (lock-queue:db-dat-set-db! vec val)(vector-set! vec 0 val)) -(define-inline (lock-queue:db-dat-set-path! vec val)(vector-set! vec 1 val)) - -(define (lock-queue:delete-lock-db dbdat) - (let ((fname (lock-queue:db-dat-get-path dbdat))) - (system (conc "rm -f " fname "*")))) - -(define (lock-queue:open-db fname #!key (count 10)) - (let* ((actualfname (conc fname ".lockdb")) - (dbexists (common:file-exists? actualfname)) - (db (sqlite3:open-database actualfname)) - (handler (make-busy-timeout 136000))) - (if dbexists - (vector db actualfname) - (begin - (handle-exceptions - exn - (begin - (thread-sleep! 10) - (if (> count 0) - (lock-queue:open-db fname count: (- count 1)) - (vector db actualfname))) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS queue ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - start_time INTEGER, - state TEXT, - CONSTRAINT queue_constraint UNIQUE (test_id));") - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS runlocks ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - run_lock TEXT, - CONSTRAINT runlock_constraint UNIQUE (run_lock));")))))) - (sqlite3:set-busy-handler! db handler) - (vector db actualfname))) - -(define (lock-queue:set-state dbdat test-id newstate #!key (remtries 10)) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 30) - (lock-queue:set-state dbdat test-id newstate remtries: (- remtries 1))) - (begin - (debug:print-error 0 *default-log-port* " Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") - #f)) - (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "UPDATE queue SET state=? WHERE test_id=?;" - newstate - test-id))) - -(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10)) - ;; no need to wait on journal on read only queries - ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:any-younger. Removing lockdb and trying again in 5 seconds.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 5) - (lock-queue:delete-lock-db dbdat) - (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1))) - (begin - (debug:print-error 0 *default-log-port* " Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") - #f)) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (tid) - ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as - (if (not (equal? tid test-id)) - (set! res tid))) - (lock-queue:db-dat-get-db dbdat) - "SELECT test_id FROM queue WHERE start_time > ?;" mystart) - res))) - -(define (lock-queue:get-lock dbdat test-id #!key (count 10)(waiting-msg #f)) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 remove: #t waiting-msg: "lock-queue:get-lock, waiting on journal") - (let* ((res #f) - (db (lock-queue:db-dat-get-db dbdat)) - (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) - (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) - (let ((result - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 10) - ;; (if (> count 0) - ;; #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries - ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained - (lock-queue:delete-lock-db dbdat) - #f) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tid lockstate) - (set! res (list tid lockstate))) - lckqry) - (if res - (if (equal? (car res) test-id) - #t ;; already have the lock - #f) - (begin - (sqlite3:execute mklckqry test-id) - ;; if no error handled then return #t for got the lock - #t))))))) - (sqlite3:finalize! lckqry) - (sqlite3:finalize! mklckqry) - result))) - -(define (lock-queue:release-lock fname test-id #!key (count 10)) - (let* ((dbdat (lock-queue:open-db fname))) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:release-lock; waiting on journal") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to release queue lock. Will try again in few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! (/ count 10)) - (if (> count 0) - (begin - (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)) - (lock-queue:release-lock fname test-id count: (- count 1))) - (let ((journal (conc fname "-journal"))) - ;; If we've tried ten times and failed there is a serious problem - ;; try to remove the lock db and allow it to be recreated - (handle-exceptions - exn - #f - (if (common:file-exists? journal)(delete-file journal)) - (if (common:file-exists? fname) (delete-file fname)) - #f)))) - (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id) - (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))))) - -(define (lock-queue:steal-lock dbdat test-id #!key (count 10)) - (debug:print-info 0 *default-log-port* "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat)) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to steal queue lock. Will try again in few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 10) - (if (> count 0) - (lock-queue:steal-lock dbdat test-id count: (- count 1)) - #f)) - (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE run_lock='locked';")) - (lock-queue:get-lock dbdat test-it)) - -;; returns #f if ok to skip the task -;; returns #t if ok to proceed with task -;; otherwise waits -;; -(define (lock-queue:wait-turn fname test-id #!key (count 10)(waiting-msg #f)) - (let* ((dbdat (lock-queue:open-db fname)) - (mystart (current-seconds)) - (db (lock-queue:db-dat-get-db dbdat))) - ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (thread-sleep! 10) - (if (> count 0) - (begin - (sqlite3:finalize! db) - (lock-queue:wait-turn fname test-id count: (- count 1))) - (begin - (debug:print 0 *default-log-port* "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain") - (print-call-chain (current-error-port)) - #f))) - ;; wait 10 seconds and then check to see if someone is already updating the html - (thread-sleep! 10) - (if (not (lock-queue:any-younger? dbdat mystart test-id)) ;; no processing in flight, must try to start processing - (begin - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") - (sqlite3:execute - db - "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" - test-id mystart) - ;; (thread-sleep! 1) ;; give other tests a chance to register - (let ((result - (let loop ((younger-waiting (lock-queue:any-younger? dbdat mystart test-id))) - (if younger-waiting - (begin - ;; no need for us to wait. mark in the lock queue db as skipping - ;; no point in marking anything in the queue - simply never register this - ;; test as it is *covered* by a previously started update to the html file - ;; (lock-queue:set-state dbdat test-id "skipping") - #f) ;; let the calling process know that nothing needs to be done - (if (lock-queue:get-lock dbdat test-id) - #t - (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock - (lock-queue:steal-lock dbdat test-id) - (begin - (thread-sleep! 1) - (loop (lock-queue:any-younger? dbdat mystart test-id))))))))) - (sqlite3:finalize! db) - result)))))) - - -;; (use trace) -;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) Index: margs.scm ================================================================== --- margs.scm +++ margs.scm @@ -24,13 +24,19 @@ (define (args:get-arg arg . default) (if (null? default) (hash-table-ref/default args:arg-hash arg #f) (hash-table-ref/default args:arg-hash arg (car default)))) -(define (args:any? . args) - (not (null? (filter (lambda (x) x) - (map args:get-arg args))))) +;; get an arg as a number +(define (args:get-arg-number arg . default) + (let* ((val-str (args:get-arg arg)) + (val (if val-str (string->number val-str) #f))) + (if val + val + (if (null? default) + #f + default)))) (define (args:get-arg-from ht arg . default) (if (null? default) (hash-table-ref/default ht arg #f) (hash-table-ref/default ht arg (car default)))) @@ -46,18 +52,22 @@ (if (string? help) (print help) (print "Usage: " (car (argv)) " ... ")) (exit 0)) - ;; one-of args defined -(define (args:any-defined? . param) - (let ((res #f)) - (for-each - (lambda (arg) - (if (args:get-arg arg)(set! res #t))) - param) - res)) +(define (args:any-defined? . args) + (not (null? (filter (lambda (x) x) + (map args:get-arg args))))) + +;; ;; one-of args defined +;; (define (args:any-defined? . param) +;; (let ((res #f)) +;; (for-each +;; (lambda (arg) +;; (if (args:get-arg arg)(set! res #t))) +;; param) +;; res)) ;; args: (define (args:get-args args params switches arg-hash num-needed) (let* ((numargs (length args)) (adj-num-needed (if num-needed (+ num-needed 2) #f))) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.7009) +(define megatest-version 1.8013) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -22,15 +22,23 @@ ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) (declare (uses common)) ;; (declare (uses megatest-version)) -(declare (uses margs)) +;; (declare (uses margs)) +;; (declare (uses mtargs)) +;; (declare (uses mtargs.import)) +(declare (uses mtargs)) +(declare (uses mtargs.import)) +(declare (uses debugprint)) +(declare (uses debugprint.import)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) + (declare (uses runs)) (declare (uses launch)) (declare (uses server)) -(declare (uses client)) (declare (uses tests)) (declare (uses genexample)) ;; (declare (uses daemon)) (declare (uses db)) @@ -41,39 +49,48 @@ (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) (declare (uses db)) -(declare (uses dbmod)) -(declare (uses dbmod.import)) -(declare (uses commonmod)) -(declare (uses commonmod.import)) (declare (uses dbfile)) (declare (uses dbfile.import)) +(declare (uses dbmod)) +(declare (uses dbmod.import)) +(declare (uses portlogger)) +(declare (uses portlogger.import)) +(declare (uses tcp-transportmod)) +(declare (uses tcp-transportmod.import)) +(declare (uses rmtmod)) +(declare (uses rmtmod.import)) + ;; (declare (uses debugprint)) ;; (declare (uses debugprint.import)) -;; (declare (uses mtargs)) -;; (declare (uses mtargs.import)) ;; (declare (uses ftail)) ;; (import ftail) -(import dbmod +(import (prefix mtargs args:) + debugprint + dbmod commonmod - dbfile) + dbfile + portlogger + tcp-transportmod + rmtmod + ) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "megatest-fossil-hash.scm") -(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) - readline apropos json http-client directory-utils typed-records - http-client srfi-18 extras format) +(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)) +(use readline apropos json http-client directory-utils typed-records) +(use http-client srfi-18 extras format tcp-server tcp) ;; Added for csv stuff - will be removed ;; (use sparse-vectors) @@ -80,11 +97,21 @@ (require-library mutils) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file +;; set some parameters here - these need to be put in something that can be loaded from other +;; executables such as dashboard and mtutil +;; +(include "transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) +(debug:enable-timestamp #t) + + +(set! rmtmod:send-receive rmt:send-receive) + ;;(lambda params (apply rmt:send-receive params))) ;; make send-receive available to rmtmod via parameter + ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) @@ -231,10 +258,11 @@ -ping run-id|host:port : ping server, exit with 0 if found -debug N|N,M,O... : enable debug 0-N or N and M and O ... -debug-noprop N|M,M,O...: enable debug but do not propagate to subprocesses via MT_DEBUG -config fname : override the megatest.config file with fname -append-config fname : append fname to the megatest.config file + -import-sexpr fname : import a sexpr file (use -list-runs % -dumpmode sexpr to create) Utilities -env2file fname : write the environment to fname.csh and fname.sh -envcap a : save current variables labeled as context 'a' in file envdat.db -envdelta a-b : output enviroment delta from context a to context b to -o fname @@ -256,10 +284,11 @@ -list-test-time : list time requered to complete each test in a run. It following following arguments -runname -target -dumpmode -syscheck : do some very basic checks; write access and space in tmp, home, runs, links and is $DISPLAY valid -list-waivers : dump waivers for specified target, runname, testpatt to stdout + -db2db : sync db to db, use -from, -to for dbs, -period and -timeout for continuous sync Diff report -diff-rep : generate diff report (must include -src-target, -src-runname, -target, -runname and either -diff-email or -diff-html) -src-target @@ -305,11 +334,11 @@ ":status" "-status" "-list-runs" "-testdata-csv" "-testpatt" - "--modepatt" + ;; "--modepatt" "-modepatt" "-tagexpr" "-itempatt" "-setlog" "-set-toplog" @@ -318,10 +347,11 @@ "-m" "-rerun" "-days" "-rename-run" + "-from" "-to" "-dest" "-source" "-time-stamp" ;; values and messages @@ -349,10 +379,13 @@ "-env2file" "-envcap" "-envdelta" "-setvars" "-set-state-status" + "-import-sexpr" + "-period" ;; sync period in seconds + "-timeout" ;; exit sync if timeout in seconds exceeded since last change ;; move runs stuff here "-remove-keep" "-set-run-status" "-age" @@ -373,10 +406,11 @@ "-load" ;; load and exectute a scheme file "-section" "-var" "-dumpmode" "-run-id" + "-db" "-ping" "-refdb2dat" "-o" "-log" "-sync-log" @@ -465,10 +499,11 @@ "-convert-to-norm" "-convert-to-old" "-import-megatest.db" "-sync-to-megatest.db" + "-db2db" "-sync-brute-force" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only @@ -582,19 +617,19 @@ ;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not ;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation ;; where (launch:setup) returns #f? ;; -(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server +(if (or (args:get-arg "-log")#;(args:get-arg "-server")) ;; redirect the log always when a server (handle-exceptions exn - (begin - (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - ) + (begin + (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified + (dbname (args:get-arg "-db")) ;; for the server logfile name (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name - (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) + (conc tl "/logs/server-"(or dbname "unk")"-"(current-process-id) "-" (get-host-name) ".log"))) (oup (open-logfile logf))) (if (not (args:get-arg "-log")) (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log (debug:print-info 0 *default-log-port* "Sending log output to " logf) (set! *default-log-port* oup)))) @@ -651,26 +686,13 @@ (process:children #f)) (original-exit exit-code))))) ;; for some switches always print the command to stderr ;; -(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") +(if (args:any-defined? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) -;; some switches imply homehost. Exit here if not on homehost -;; -(let ((homehost-required (list "-cleanup-db"))) - (if (apply args:any? homehost-required) - (if (not (server:choose-server *toppath* 'home?)) - (for-each - (lambda (switch) - (if (args:get-arg switch) - (begin - (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch - ", you can move homehost by removing the .homehost file but this will disrupt any runs in progress.") - (exit 1)))) - homehost-required)))) ;;====================================================================== ;; Misc setup stuff ;;====================================================================== @@ -883,11 +905,13 @@ )) (if (args:get-arg "-ping") (let* ((server-id (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":" (host:port (args:get-arg "-ping"))) - (server:ping (or server-id host:port) #f do-exit: #t))) + (debug:print 0 *default-log-port* "NOT YET REIMPLEMENTED FOR TCP/INMEM") ;; bug + (exit))) + ;; (server:ping (or server-id host:port) #f do-exit: #t))) ;;====================================================================== ;; Capture, save and manipulate environments ;;====================================================================== @@ -934,13 +958,25 @@ ;;====================================================================== ;; Server? Start up here. ;; (if (args:get-arg "-server") - (let ((tl (launch:setup)) - (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) - (server:launch 0 transport-type) + (let* (;; (run-id (args:get-arg "-run-id")) + (dbfname (args:get-arg "-db")) + (tl (launch:setup)) + (keys (keys:config-get-fields *configdat*))) + (case (rmt:transport-mode) + ((tcp) + (let* ((timeout (server:expiration-timeout))) + (debug:print 0 *default-log-port* "INFO: Running using tcp method with server timeout of "timeout) + (tt-server-timeout-param timeout) + (if dbfname + (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys) + (begin + (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.") + (exit 1))))) + (else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode)))) (set! *didsomething* #t))) ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. ;; @@ -950,23 +986,31 @@ (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) + (debug:print 0 *default-log-port* "NOT YET REIMPLEMENTED FOR TCP/INMEM") ;; BUG + (exit) (if tl ;; all roads from here exit (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*)) (fmtstr "~33a~22a~20a~20a~8a\n")) - (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State") - (format #t fmtstr "==" "=========" "=========" "========" "=====") - (for-each ;; ( mod-time host port start-time pid ) + (if (not servers) + (begin + (debug:print-info 1 *default-log-port* "No servers found") + (exit) + ) + ) + (format #t fmtstr "PID" "host:port" "age (hms)" "Last mod" "State") + (format #t fmtstr "===" "=========" "=========" "========" "=====") + (for-each ;; (ip-addr port? mod-time host port start-time pid ) (lambda (server) - (let* ((mtm (any->number (car server))) + (let* ((mtm (any->number (caddr server))) (mod (if mtm (- (current-seconds) mtm) "unk")) - (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds)))) - (url (conc (cadr server) ":" (caddr server))) + (age (- (current-seconds)(or (any->number mtm) (current-seconds)))) (pid (list-ref server 4)) - (alv (if (number? mod)(< mod 10) #f))) + (url (conc (car server) ":" (cadr server))) + (alv (if (number? mod)(< mod 360) #f))) (format #t fmtstr pid url (seconds->hr-min-sec age) @@ -979,11 +1023,10 @@ (server:kill server))))) (sort servers (lambda (a b) (let ((ma (or (any->number (car a)) 9e9)) (mb (or (any->number (car b)) 9e9))) (> ma mb))))) - ;; (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) (exit)))) ;; must do, would have to add checks to many/all calls below @@ -1383,12 +1426,11 @@ ;; IDEA: megatest list -runname blah% ... ;; (if (or (args:get-arg "-list-runs") (args:get-arg "-list-db-targets")) (if (launch:setup) - (let* (;; (dbstruct (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local"))) - (runpatt (args:get-arg "-list-runs")) + (let* ((runpatt (args:get-arg "-list-runs")) (access-mode (db:get-access-mode)) (testpatt (common:args-get-testpatt #f)) ;; (if (args:get-arg "-testpatt") ;; (args:get-arg "-testpatt") ;; "%")) @@ -1433,10 +1475,15 @@ db:test-record-fields t))) (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) ;; '("id")))) (steps-spec (alist-ref "steps" fields-spec equal?)) (test-field-index (make-hash-table))) + (if (and (args:get-arg "-dumpmode") + (not (member (args:get-arg "-dumpmode") '("sexpr" "json" "ods" "list")))) + (begin + (debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list") + (exit))) (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec))) (if (null? invalid-tests-spec) ;; generate the lookup map test-field-name => index-number (let loop ((hed (car adj-tests-spec)) @@ -1488,11 +1535,11 @@ ;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header "id")) targetstr runname "meta" "id" ) ;; (mutils:hierhash-set! data (db:get-value-by-header run header "event_time") targetstr runname "meta" "event_time" ) ;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment") targetstr runname "meta" "comment" ) ;; ;; add last entry twice - seems to be a bug in hierhash? ;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment") targetstr runname "meta" "comment" ) - (else + ((#f list) (if (null? runs-spec) (print "Run: " targetstr "/" runname " status: " (db:get-value-by-header run header "state") " run-id: " run-id ", number tests: " (length tests) " event_time: " (db:get-value-by-header run header "event_time")) @@ -1504,11 +1551,14 @@ (lambda (field-name) (if (equal? field-name "target") (display (conc "target: " targetstr " ")) (display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " ")))) runs-spec) - (newline))))) + (newline)))) + (else + (debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list") + )) (for-each (lambda (test) (common:debug-handle-exceptions #f exn @@ -1856,11 +1906,11 @@ (lambda (target runname keys keyvals) (if (or (string-search "%" target) (string-search "%" runname)) ;; we are being asked to re-run multiple runs (let* ((run-specs (rmt:simple-get-runs runname #f #f target #f))) ;; list of simple-run records (debug:print-info 0 *default-log-port* "Pattern supplied for target or runname with " - (length run-specs) " matches round. Running each in turn.") + (length run-specs) " matches found. Running each in turn.") (if (null? run-specs) (debug:print 0 *default-log-port* "WARNING: No runs match target " target " and runname " runname)) (for-each (lambda (spec) (let* ((precmd (if (args:get-arg "-precmd")(conc (args:get-arg "-precmd") " ") "")) (newcmdline (conc @@ -2054,11 +2104,11 @@ (if (args:get-arg "-extract-ods") (general-run-call "-extract-ods" "Make ods spreadsheet" (lambda (target runname keys keyvals) - (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: #t)) + (let ((dbstruct (make-dbr:dbstruct areapath: *toppath* local: #t)) (outputfile (args:get-arg "-extract-ods")) (runspatt (or (args:get-arg "-runname")(args:get-arg ":runname"))) (pathmod (args:get-arg "-pathmod"))) ;; (keyvalalist (keys->alist keys "%"))) (debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals) @@ -2177,11 +2227,10 @@ (exit 1))) (if (args:get-arg "-runstep")(debug:print-info 1 *default-log-port* "Running -runstep, first change to directory " work-area)) (change-directory work-area) ;; can setup as client for server mode now - ;; (client:setup) (if (args:get-arg "-load-test-data") ;; has sub commands that are rdb: ;; DO NOT put this one into either rmt: or open-run-close (tdb:load-test-data run-id test-id)) @@ -2321,10 +2370,16 @@ (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) + +;; (if (not (server:choose-server *toppath* 'home?)) +;; (begin +;; (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db") +;; (exit 1))) + (let ((dbstructs (db:setup #f))) (common:cleanup-db dbstructs)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") @@ -2379,11 +2434,13 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstructs (if (and toppath - (server:choose-server toppath 'home?)) + ;; NOTE: server:choose-server is starting a server + ;; either add equivalent for tcp mode or ???? + #;(server:choose-server toppath 'home?)) (db:setup #t) #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") @@ -2479,14 +2536,15 @@ 'adj-testids 'old2new ) (set! *didsomething* #t))) -(when (args:get-arg "-sync-brute-force") - (launch:setup) - ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t)) - (set! *didsomething* #t)) +(if (args:get-arg "-import-sexpr") + (begin + (launch:setup) + (rmt:import-sexpr (args:get-arg "-import-sexpr")) + (set! *didsomething* #t))) (if (args:get-arg "-sync-to-megatest.db") (let* ((duh (launch:setup)) (dbstruct (db:setup #t)) (tmpdbpth (dbr:dbstruct-tmppath dbstruct)) @@ -2507,10 +2565,60 @@ (if (args:get-arg "-sync-to") (let ((toppath (launch:setup))) (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to")) (set! *didsomething* #t))) +;; use with -from and -to +;; +(if (args:get-arg "-db2db") + (let* ((duh (launch:setup)) + (src-db (args:get-arg "-from")) + (dest-db (args:get-arg "-to")) + (sync-period (args:get-arg-number "-period")) + (sync-timeout (args:get-arg-number "-timeout")) + ;; (sync-period (if sync-period-in (string->number sync-period-in) #f)) + ;; (sync-timeout (if sync-timeout-in (string->number sync-timeout-in) #f)) + (lockfile (conc dest-db".sync-lock")) + (keys (db:get-keys #f)) + (thesync (lambda (last-update) + (debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...") + (if (not (file-exists? dest-db)) + (begin + (dbfile:with-simple-file-lock + (conc dest-db ".lock") ;; is the db being opened right now? + (lambda () + (debug:print 0 *default-log-port* "Using copy to create "dest-db" from "src-db) + (file-copy src-db dest-db) + 1))) + (let ((res (dbmod:db-to-db-sync src-db dest-db last-update (dbfile:db-init-proc) keys))) + (if res + (debug:print-info 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db) + (debug:print-info 0 *default-log-port* "No sync due to permissions or other issue.")) + res))))) + (if (and src-db dest-db) + (if (file-exists? src-db) + (if (file-exists? lockfile) + (debug:print 0 *default-log-port* "Lock "lockfile" exists, skipping sync...") + (dbfile:with-simple-file-lock + (conc dest-db"-sync-running") + (lambda () + (let loop ((last-changed (current-seconds)) + (last-update 0)) + (let* ((changes (dbfile:with-simple-file-lock + lockfile + (lambda () + (thesync last-update)))) + (now-time (current-seconds))) + (if (and sync-period sync-timeout) ;; + (if (> sync-timeout (- now-time last-changed)) + (begin + (if sync-period (thread-sleep! sync-period)) + (loop (if (> changes 0) now-time last-changed) now-time))))))))) + (debug:print 0 *default-log-port* "No sync due to unreadble or non-existant source file"src-db)) + (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified")) + (set! *didsomething* #t))) + (if (args:get-arg "-list-test-time") (let* ((toppath (launch:setup))) (task:get-test-times) (set! *didsomething* #t))) DELETED mlaunch.scm Index: mlaunch.scm ================================================================== --- mlaunch.scm +++ /dev/null @@ -1,33 +0,0 @@ -;; Copyright 2006-2014, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') - -;;====================================================================== -;; MLAUNCH -;; -;; take jobs from the given queue and keep launching them keeping -;; the cpu load at the targeted level -;; -;;====================================================================== - -(use sqlite3 srfi-1 posix regex regex-case srfi-69 format) - -(declare (unit mlaunch)) -(declare (uses db)) -(declare (uses common)) - Index: monitor.scm ================================================================== --- monitor.scm +++ monitor.scm @@ -23,10 +23,12 @@ (declare (unit runs)) (declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) +(declare (uses commonmod)) +(import commonmod) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -19,19 +19,23 @@ (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking (srfi 18) posix-extras directory-utils call-with-environment-variables) (import (prefix sqlite3 sqlite3:)) (declare (unit mt)) +(declare (uses debugprint)) (declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses runs)) (declare (uses rmt)) -;; (declare (uses filedb)) +(declare (uses rmtmod)) + +(import debugprint + rmtmod) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") Index: mtargs.scm ================================================================== --- mtargs.scm +++ mtargs.scm @@ -17,7 +17,9 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit mtargs)) + +(use srfi-69) (include "mtargs/mtargs.scm") ADDED mtargs/mtargs.egg Index: mtargs/mtargs.egg ================================================================== --- /dev/null +++ mtargs/mtargs.egg @@ -0,0 +1,7 @@ +((license "LGPL") + (version 0.1) + (category misc) + (dependencies srfi-69 srfi-1) + (author "Matt Welland") + (synopsis "Primitive argument processor.") + (components (extension mtargs))) Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -18,28 +18,45 @@ (module mtargs ( arg-hash get-arg + get-arg-number get-arg-from - usage get-args + usage print-args any-defined? - help - ) - -(import scheme chicken data-structures extras posix ports files) -(use srfi-69 srfi-1) - -(define arg-hash (make-hash-table)) -(define help "") + ) + +(import scheme) ;; gives us cond-expand in chicken-4 + +(cond-expand + (chicken-5 + (import scheme (chicken base) (chicken port) (chicken file) (chicken process-context)) + (import srfi-69 srfi-1)) + (chicken-4 + (import chicken posix srfi-69 srfi-1)) + (else)) + +(define usage (make-parameter print)) +(define arg-hash (make-hash-table)) (define (get-arg arg . default) (if (null? default) (hash-table-ref/default arg-hash arg #f) (hash-table-ref/default arg-hash arg (car default)))) + +;; get an arg as a number +(define (get-arg-number arg . default) + (let* ((val-str (get-arg arg)) + (val (if val-str (string->number val-str) #f))) + (if val + val + (if (null? default) + #f + default)))) (define (any-defined? . args) (not (null? (filter (lambda (x) x) (map get-arg args))))) @@ -48,28 +65,10 @@ (define (get-arg-from ht arg . default) (if (null? default) (hash-table-ref/default ht arg #f) (hash-table-ref/default ht arg (car default)))) -(define (usage . args) - (if (> (length args) 0) - (apply print "ERROR: " args)) - (if (string? help) - (print help) - (print "Usage: " (car (argv)) " ... ")) - (exit 0)) - - ;; one-of args defined -(define (any-defined? . param) - (let ((res #f)) - (for-each - (lambda (arg) - (if (get-arg arg)(set! res #t))) - param) - res)) - -;; args: (define (get-args args params switches arg-hash num-needed) (let* ((numtargs (length args)) (adj-num-needed (if num-needed (+ num-needed 2) #f))) (if (< numtargs (if adj-num-needed adj-num-needed 2)) (if (>= num-needed 1) @@ -94,13 +93,16 @@ (else (if (null? tail)(append remtargs (list arg)) ;; return the non-used args (loop (car tail)(cdr tail)(append remtargs (list arg)))))))) )) -(define (print-args remtargs arg-hash) - (print "ARGS: " remtargs) +(define (print-args arg-hash) (for-each (lambda (arg) (print " " arg " " (hash-table-ref/default arg-hash arg #f))) (hash-table-keys arg-hash))) +(define (any-defined? . args) + (not (null? (filter (lambda (x) x) + (map get-arg args))))) + ) Index: mtexec.scm ================================================================== --- mtexec.scm +++ mtexec.scm @@ -26,13 +26,16 @@ srfi-19 srfi-18 extras format pkts regex regex-case (prefix dbi dbi:) ) ;; (declare (uses common)) -(declare (uses margs)) +(declare (uses mtargs)) (declare (uses configf)) ;; (declare (uses rmt)) +(declare (uses commonmod)) +(import commonmod + (prefix mtargs args:)) ;; (use ducttape-lib) (include "megatest-version.scm") (include "megatest-fossil-hash.scm") Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -13,12 +13,21 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; +; +(declare (uses common)) +(declare (uses mtargs)) +(declare (uses debugprint)) +(declare (uses configf)) +;; (declare (uses rmt)) +(declare (uses commonmod)) +(declare (uses commonmod.import)) -;; (include "common.scm") +(import debugprint) + ; (include "common.scm") (include "megatest-version.scm") ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) @@ -26,14 +35,12 @@ srfi-19 srfi-18 extras format pkts regex regex-case (prefix dbi dbi:) (prefix sqlite3 sqlite3:) nanomsg) -(declare (uses common)) -(declare (uses margs)) -(declare (uses configf)) -;; (declare (uses rmt)) +(import commonmod + (prefix mtargs args:)) (use ducttape-lib) (include "megatest-fossil-hash.scm") Index: newdashboard.scm ================================================================== --- newdashboard.scm +++ newdashboard.scm @@ -16,10 +16,16 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== +(declare (uses common)) +(declare (uses debugprint)) +(declare (uses megatest-version)) +(declare (uses mtargs)) +(declare (uses commonmod)) + (use format) (use (prefix iup iup:)) (use canvas-draw) @@ -26,19 +32,18 @@ (import canvas-draw-iup) (use sql-de-lite srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors ;; defstruct (prefix dbi dbi:)) -(declare (uses common)) -(declare (uses megatest-version)) -(declare (uses margs)) +(import commonmod + debugprint + (prefix mtargs args:)) ;; (declare (uses launch)) ;; (declare (uses gutils)) ;; (declare (uses db)) ;; (declare (uses server)) -;; (declare (uses synchash)) (declare (uses dcommon)) ;; (declare (uses tree)) ;; ;; (include "common_records.scm") ;; (include "db_records.scm") Index: ods.scm ================================================================== --- ods.scm +++ ods.scm @@ -17,10 +17,12 @@ ;; (use csv-xml regex) (declare (unit ods)) (declare (uses common)) +(declare (uses commonmod)) +(import commonmod) (define ods:dirs '("Configurations2" "Configurations2/toolpanel" "Configurations2/menubar" DELETED portlogger-example.scm Index: portlogger-example.scm ================================================================== --- portlogger-example.scm +++ /dev/null @@ -1,21 +0,0 @@ -;; Copyright 2006-2017, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - - -(declare (uses portlogger)) - -(print (apply portlogger:main (cdr (argv)))) Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -15,23 +15,28 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; -(require-extension (srfi 18) extras tcp s11n) - -(use sqlite3 srfi-1 posix srfi-69 hostinfo dot-locking z3) -(import (prefix sqlite3 sqlite3:)) (declare (unit portlogger)) -(declare (uses db)) +(declare (uses debugprint)) +(declare (uses dbmod)) + +(module portlogger +* +(import scheme chicken data-structures) +(import srfi-1 posix srfi-69 hostinfo dot-locking z3 + (srfi 18) extras s11n) +(import (prefix sqlite3 sqlite3:)) +(import debugprint dbmod) ;; lsof -i (define (portlogger:open-db fname) (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away - (exists (common:file-exists? fname)) + (exists (file-exists? fname)) (db (if avail (sqlite3:open-database fname) (begin (system (conc "rm -f " fname)) (sqlite3:open-database fname)))) @@ -44,12 +49,11 @@ ;; port INTEGER PRIMARY KEY, ;; state TEXT DEFAULT 'not-used', ;; fail_count INTEGER DEFAULT 0, ;; update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")))) (sqlite3:set-busy-handler! db handler) - (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; (if (not exists) ;; needed with IF NOT EXISTS? + (sqlite3:execute db "PRAGMA synchronous = 0;") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS ports ( port INTEGER PRIMARY KEY, state TEXT DEFAULT 'not-used', @@ -64,12 +68,12 @@ exn (begin ;; (release-dot-lock fname) (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it + ;; (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) (db (portlogger:open-db fname)) (res (apply proc db params))) (sqlite3:finalize! db) @@ -122,17 +126,12 @@ (or curr var curr)) #f db "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))) -(define (portlogger:find-port db) - (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport"))) - (if (and val - (string->number val)) - (string->number val) - 32768))) - (portnum (or (portlogger:get-prev-used-port db) +(define (portlogger:find-port db #!optional (lowport 32768)) + (let* ((portnum (or (portlogger:get-prev-used-port db) (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range (random (- 64000 lowport)))))) (handle-exceptions exn (begin @@ -184,5 +183,6 @@ ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed))))) (sqlite3:finalize! db) result)) ;; (print (apply portlogger:main (cdr (argv)))) +) Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -22,10 +22,13 @@ ;; Process convience utils ;;====================================================================== (use regex directory-utils) (declare (unit process)) +(declare (uses debugprint)) + +(import debugprint) (define (process:conservative-read port) (let loop ((res "")) (if (not (eof-object? (peek-char port))) (loop (conc res (read-char port))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -19,17 +19,28 @@ ;;====================================================================== (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) +(declare (uses debugprint)) (declare (uses api)) -(declare (uses http-transport)) +(declare (uses commonmod)) (declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses tcp-transportmod)) (include "common_records.scm") -;; (declare (uses rmtmod)) +(declare (uses rmtmod)) -(import dbfile) ;; rmtmod) +;; used by http-transport +(import dbfile + rmtmod + commonmod + debugprint +;; dbmemmod + dbfile + dbmod + tcp-transportmod) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; @@ -39,64 +50,55 @@ ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== -;; if a server is either running or in the process of starting call client:setup -;; else return #f to let the calling proc know that there is no server available -;; -(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. - (let* ((runremote (or area-dat *runremote*)) - (cinfo (if (remote? runremote) - (remote-conndat runremote) - #f))) - (if cinfo - cinfo - (if (server:check-if-running areapath) - (client:setup areapath) - #f)))) - (define (rmt:on-homehost? runremote) (let* ((hh-dat (remote-hh-dat runremote))) (if (pair? hh-dat) (cdr hh-dat) (begin (debug:print-info 0 *default-log-port* "hh-dat="hh-dat) #f)))) +(define (make-and-init-remote areapath) + (case (rmt:transport-mode) + ((http)(make-remote)) + ((tcp) (tt:make-remote areapath)) + (else #f))) ;;====================================================================== (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - - #;(common:telemetry-log (conc "rmt:"(->string cmd)) - payload: `((rid . ,rid) - (params . ,params))) - - (if (> attemptnum 2) - (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) - - (cond - ((> attemptnum 2) (thread-sleep! 0.05)) - ((> attemptnum 10) (thread-sleep! 0.5)) - ((> attemptnum 20) (thread-sleep! 1))) - (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) - (begin (server:run *toppath*) (thread-sleep! 3))) - - - ;;DOT digraph megatest_state_status { - ;;DOT ranksep=0; - ;;DOT // rankdir=LR; - ;;DOT node [shape="box"]; - ;;DOT "rmt:send-receive" -> MUTEXLOCK; - ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } - ;; do all the prep locked under the rmt-mutex - (mutex-lock! *rmt-mutex*) + (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") + + (if (not (eq? (rmt:transport-mode) 'nfs)) + (begin + (if (> attemptnum 2) + (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) + + (cond + ((> attemptnum 2) (thread-sleep! 0.05)) + ((> attemptnum 10) (thread-sleep! 0.5)) + ((> attemptnum 20) (thread-sleep! 1))) + + ;; I'm turning this off, it may make sense to move it + ;; into http-transport-handler + (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) + (begin + (debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.") + (case (rmt:transport-mode) + ((http) + (server:run *toppath*) + (thread-sleep! 3)) + (else + (thread-sleep! 1) ;; for tcp the server is started by routines in tcp-transportmod. For nfs there is no server + )))))) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; @@ -103,259 +105,33 @@ (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*)) (attemptnum (+ 1 attemptnum)) - (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) - - ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; - ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; - ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - - ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity - ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; - ;; DOT SET_HOMEHOST -> MUTEXLOCK; - ;; ensure we have a homehost record - (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost - (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little - (let ((hh-data (server:choose-server areapath 'homehost))) - (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) - - ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) - (cond - #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds - (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") - (set! *runremote* #f) - ;; BUG: close-connections should go here? - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) - - ;;DOT EXIT; - ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } - ;; give up if more than 150 attempts - ((> attemptnum 150) - (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") - (exit 1)) - - ;;DOT CASE2 [label="local\nreadonly\nquery"]; - ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} - ;;DOT CASE2 -> "rmt:open-qry-close-locally"; - ;; readonly mode, read request- handle it - case 2 - ((and readonly-mode - (member cmd api:read-only-queries)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") - (rmt:open-qry-close-locally cmd 0 params) - ) - - ;;DOT CASE3 [label="write in\nread-only mode"]; - ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} - ;;DOT CASE3 -> "#f"; - ;; readonly mode, write request. Do nothing, return #f - (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) - - ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. - ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. - ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) - ;; - ;;DOT CASE4 [label="reset\nconnection"]; - ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} - ;;DOT CASE4 -> "rmt:send-receive"; - ;; reset the connection if it has been unused too long - ((and runremote - (remote-conndat runremote) - (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on - (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) - (remote-server-timeout runremote)))) - (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") - (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. - (http-transport:close-connections area-dat: runremote) - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE5 [label="local\nread"]; - ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; - ;;DOT CASE5 -> "rmt:open-qry-close-locally"; - - ;; on homehost and this is a read - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (rmt:on-homehost? runremote) - (member cmd api:read-only-queries)) ;; this is a read - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE6 [label="init\nremote"]; - ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; - ;;DOT CASE6 -> "rmt:send-receive"; - ;; on homehost and this is a write, we already have a server, but server has died - - ;; reinstate this keep-alive section but inject a time condition into the (add ... - - #;((and (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write - (remote-server-url runremote) ;; have a server - (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") - (http-transport:close-connections area-dat: runremote) ;; make sure to clean up - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - (rmt:send-receive cmd rid params attemptnum: attemptnum)) - - ;;DOT CASE7 [label="homehost\nwrite"]; - ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; - ;;DOT CASE7 -> "rmt:open-qry-close-locally"; - ;; on homehost and this is a write, we already have a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write - (remote-server-url runremote)) ;; have a server - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") - (rmt:open-qry-close-locally cmd 0 params)) - - ;;DOT CASE8 [label="force\nserver"]; - ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; - ;;DOT CASE8 -> "rmt:open-qry-close-locally"; - ;; on homehost, no server contact made and this is a write, passively start a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required - (cdr (remote-hh-dat runremote)) ;; have homehost - (not (remote-server-url runremote)) ;; no connection yet - (not (member cmd api:read-only-queries))) ;; not a read-only query - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") - (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call - (if server-info - (begin - (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed - (remote-server-id-set! runremote (server:record->id server-info))) - (if (common:force-server?) - (server:start-and-wait *toppath*) - (server:kind-run *toppath*))) - (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") - (rmt:open-qry-close-locally cmd 0 params))) - - ;;DOT CASE9 [label="force server\nnot on homehost"]; - ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; - ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; - ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one - (not (remote-conndat runremote))) - (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost - (not (remote-conndat runremote)))) ;; and no connection - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) - (mutex-unlock! *rmt-mutex*) - (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? - (server:start-and-wait *toppath*)) - (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http - (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as - - ;;DOT CASE10 [label="on homehost"]; - ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; - ;;DOT CASE10 -> "rmt:open-qry-close-locally"; - ;; all set up if get this far, dispatch the query - ((and (not (remote-force-server runremote)) - (cdr (remote-hh-dat runremote))) ;; we are on homehost - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") - (rmt:open-qry-close-locally cmd (if rid rid 0) params)) - - ;;DOT CASE11 [label="send_receive"]; - ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; - ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; - ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; - ;; not on homehost, do server query - (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) - ;;DOT } - -;; No Title -;; Error: (vector-ref) out of range -;; #(# (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299))) -;; 6 -;; -;; Call history: -;; -;; http-transport.scm:306: thread-terminate! -;; http-transport.scm:307: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:259: k587 -;; rmt.scm:259: g591 -;; rmt.scm:276: http-transport:server-dat-update-last-access -;; http-transport.scm:364: current-seconds -;; rmt.scm:282: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:283: mutex-unlock! -;; rmt.scm:287: extras-transport-succeded <-- -;; +-----------------------------------------------------------------------------+ -;; | Exit Status : 70 -;; - -;; bunch of small functions factored out of send-receive to make debug easier -;; - -(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) - ;; (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") - ;; (mutex-lock! *rmt-mutex*) - (let* ((conninfo (remote-conndat runremote)) - (dat-in (case (remote-transport runremote) - ((http) (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:client-api-send-receive 0 conninfo cmd params) - ((servermismatch) (vector #f "Server id mismatch" )) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (else - (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") - (exit)))) - (dat (if (and (vector? dat-in) ;; ... check it is a correct size - (> (vector-length dat-in) 1)) - dat-in - (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) - (success (if (vector? dat) (vector-ref dat 0) #f)) - (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time - (begin - (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) - (set! conninfo #f) - (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. - (http-transport:close-connections area-dat: runremote))) - (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) - (mutex-unlock! *rmt-mutex*) - (if success ;; success only tells us that the transport was - ;; successful, have to examine the data to see if - ;; there was a detected issue at the other end - (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (begin - (debug:print-error 0 *default-log-port* " dat=" dat) - (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) - ))) - + (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)) + (testsuite (common:get-testsuite-name)) + (mtexe (common:find-local-megatest))) + + (case (rmt:transport-mode) + ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)) + ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe)) + ((nfs) (nfs-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe)) + ))) + +(define (nfs-transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe) + (let* ((keys (common:get-fields *configdat*)) + (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath))) + (api:dispatch-request dbstruct cmd run-id params))) + +(define (tcp-transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe) + (if (not runremote) + (let* ((newremote (make-and-init-remote areapath))) + (set! *runremote* newremote) + (set! runremote newremote))) + (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id))) + (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))) + (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" (debug:print 18 *default-log-port* "DB Stats\n========") (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) (for-each (lambda (cmd) @@ -392,11 +168,11 @@ res)) (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) (let* ((qry-is-write (not (member cmd api:read-only-queries))) (db-file-path (db:dbfile-path)) ;; 0)) - (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (dbstructs-local (db:setup #t)) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. @@ -432,22 +208,10 @@ (mutex-lock! *db-multi-sync-mutex*) / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) -(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) - (let* ((run-id (if run-id run-id 0)) - (res ;; (handle-exceptions - ;; exn - ;; (begin - ;; (print "transport failed. exn=" exn) - ;; #f) - (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; ) - (if (and res (vector-ref res 0)) - (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! - #f))) - ;;====================================================================== ;; ;; A C T U A L A P I C A L L S ;; ;;====================================================================== @@ -470,21 +234,12 @@ (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; Deprecated for nmsg-transport. ;; -(define (rmt:login-no-auto-client-setup connection-info) - (case *transport-type* ;; run-id of 0 is just a placeholder - ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature)))) - ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))) - )) - -;; hand off a call to one of the db:queries statements -;; added run-id to make looking up the correct db possible -;; -(define (rmt:general-call stmtname run-id . params) - (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) +;; (define (rmt:login-no-auto-client-setup runremote) +;; (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature)))) ;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host (define (rmt:get-latest-host-load hostname) (rmt:send-receive 'get-latest-host-load 0 (list hostname))) @@ -519,13 +274,14 @@ ;;====================================================================== ;; K E Y S ;;====================================================================== ;; These require run-id because the values come from the run! +;; however the query must still apply to main.db ;; (define (rmt:get-key-val-pairs run-id) - (rmt:send-receive 'get-key-val-pairs run-id (list run-id))) + (rmt:send-receive 'get-key-val-pairs #f (list run-id))) (define (rmt:get-keys) (if *db-keys* *db-keys* (let ((res (rmt:send-receive 'get-keys #f '()))) (set! *db-keys* res) @@ -548,81 +304,20 @@ (define (rmt:get-targets) (rmt:send-receive 'get-targets #f '())) (define (rmt:get-target run-id) (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-target run-id (list run-id))) + (rmt:send-receive 'get-target #f (list run-id))) (define (rmt:get-run-times runpatt targetpatt) (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) ;;====================================================================== ;; T E S T S ;;====================================================================== -;; Just some syntatic sugar -(define (rmt:register-test run-id test-name item-path) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:general-call 'register-test run-id run-id test-name item-path)) - -(define (rmt:get-test-id run-id testname item-path) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) - -;; run-id is NOT used -;; -(define (rmt:get-test-info-by-id run-id test-id) - (if (number? test-id) - (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) - (begin - (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) - (print-call-chain (current-error-port)) - #f))) - -(define (rmt:test-get-rundir-from-test-id run-id test-id) - (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) - -(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) - (assert (number? run-id) "FATAL: Run id required.") - (let* ((test-path (if (string? work-area) - work-area - (rmt:test-get-rundir-from-test-id run-id test-id)))) - (debug:print 3 *default-log-port* "TEST PATH: " test-path) - (open-test-db test-path))) - -;; WARNING: This currently bypasses the transaction wrapped writes system -(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) - -(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) - -(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - (assert (number? run-id) "FATAL: Run id required.") - ;; (if (number? run-id) - (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) - ;; (begin - ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) - ;; (print-call-chain (current-error-port)) - ;; '()))) - -(define (rmt:get-tests-for-run-state-status run-id testpatt last-update) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update))) - -;; get stuff via synchash -(define (rmt:synchash-get run-id proc synckey keynum params) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) - -(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) - ;; IDEA: Threadify these - they spend a lot of time waiting ... ;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) (let ((multi-run-mutex (make-mutex)) (run-id-list (if run-ids @@ -744,16 +439,10 @@ (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) (assert (number? run-id) "FATAL: Run id required.") (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) -;; state and status are extra hints not usually used in the calculation -;; -(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) - (assert (number? run-id) "FATAL: Run id required.") - (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) - (define (rmt:set-state-status-and-roll-up-run run-id state status) (assert (number? run-id) "FATAL: Run id required.") (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) @@ -829,10 +518,13 @@ (define (rmt:get-run-state run-id) (assert (number? run-id) "FATAL: Run id required.") (rmt:send-receive 'get-run-state #f (list run-id))) +(define (rmt:get-run-state-status run-id) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:send-receive 'get-run-state-status #f (list run-id))) (define (rmt:set-run-status run-id run-status #!key (msg #f)) (assert (number? run-id) "FATAL: Run id required.") (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) @@ -1044,69 +736,42 @@ (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) - -(define (rmtmod:calc-ro-mode runremote *toppath*) - (if (and runremote - (remote-ro-mode-checked runremote)) - (remote-ro-mode runremote) - (let* ((mtcfgfile (conc *toppath* "/megatest.config")) - (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (remote-ro-mode-set! runremote ro-mode) - (remote-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))) - -(define (extras-readonly-mode rmt-mutex log-port cmd params) - (mutex-unlock! rmt-mutex) - (debug:print-info 12 log-port "rmt:send-receive, case 3") - (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) - #f) - -(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) - (mutex-lock! *rmt-mutex*) - (remote-conndat-set! runremote #f) - (http-transport:close-connections area-dat: runremote) - (remote-server-url-set! runremote #f) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - -(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (if (and (vector? res) - (eq? (vector-length res) 2) - (eq? (vector-ref res 1) 'overloaded)) ;; since we are - ;; looking at the - ;; data to carry the - ;; error we'll use a - ;; fairly obtuse - ;; combo to minimise - ;; the chances of - ;; some sort of - ;; collision. this - ;; is the case where - ;; the returned data - ;; is bad or the - ;; server is - ;; overloaded and we - ;; want to ease off - ;; the queries - (let ((wait-delay (+ attemptnum (* attemptnum 10)))) - (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") - (mutex-lock! *rmt-mutex*) - (http-transport:close-connections area-dat: runremote) - (set! *runremote* #f) ;; force starting over - (mutex-unlock! *rmt-mutex*) - (thread-sleep! wait-delay) - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - res)) ;; All good, return res - -#;(set-functions rmt:send-receive remote-server-url-set! - http-transport:close-connections remote-conndat-set! - debug:print debug:print-info - remote-ro-mode remote-ro-mode-set! - remote-ro-mode-checked-set! remote-ro-mode-checked) +(define (rmtmod:calc-ro-mode runremote *toppath*) + (case (rmt:transport-mode) + ((http) + (if (and runremote + (remote-ro-mode-checked runremote)) + (remote-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (remote-ro-mode-set! runremote ro-mode) + (remote-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))) + ((tcp) + (if (and runremote + (tt-ro-mode-checked runremote)) + (tt-ro-mode runremote) + (let* ((mtcfgfile (conc *toppath* "/megatest.config")) + (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (tt-ro-mode-set! runremote ro-mode) + (tt-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))))) + +;;====================================================================== +;; Maintenance +;;====================================================================== + +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + (let* ((cfg-deadtime (configf:lookup-number *configdat* "setup" "deadtime")) + (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period"))) + (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) + ;;call end of eud of run detection for posthook + (launch:end-of-run-check run-id))) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -17,69 +17,269 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit rmtmod)) +(declare (uses debugprint)) (declare (uses commonmod)) -(declare (uses apimod)) +(declare (uses dbfile)) ;; needed for records + +;; (declare (uses apimod)) ;; (declare (uses apimod.import)) -(declare (uses ulex)) +;; (declare (uses ulex)) ;; (include "ulex/ulex.scm") (module rmtmod * -(import scheme chicken data-structures extras) +(import scheme chicken data-structures extras matchable srfi-69) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) -(import (prefix commonmod cmod:)) -(import apimod) -(import (prefix ulex ulex:)) +(import commonmod dbfile debugprint) ;; (prefix commonmod cmod:)) +;; (import apimod) +;; (import (prefix ulex ulex:)) + +(include "db_records.scm") (defstruct alldat (areapath #f) (ulexdat #f) ) -;;====================================================================== -;; return the handle struct for sending queries to a specific database -;; - initializes the connection object if this is the first access -;; - finds the "captain" and asks who to talk to for the given dbfname -;; - establishes the connection to the current dbowner -;; -#;(define (rmt:connect alldat dbfname dbtype) - (let* ((ulexdat (or (alldat-ulexdat alldat) - (rmt:setup-ulex alldat)))) - (ulex:connect ulexdat dbfname dbtype))) - -;; setup the remote calls -#;(define (rmt:setup-ulex alldat) - (let* ((udata (ulex:setup))) ;; establish connection to ulex - (alldat-ulexdat-set! alldat udata) - ;; register all needed procs - (ulex:register-handler udata 'ping cmod:get-full-version) ;; override ping with get-full-version - (ulex:register-handler udata 'login cmod:get-full-version) ;; force setup of the connection - (ulex:register-handler udata 'execute api:execute-requests) - udata)) - -;; set up a connection to the current owner of the dbfile associated with rid -;; then send the query to that dbfile owner and wait for a response. -;; -#;(define (rmt:send-receive alldat cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - (let* (;; (alldat *alldat*) - (areapath (alldat-areapath alldat)) - (dbtype (if (or (not rid)(< rid 1)) ;; this is the criteria for "main.db" - "main" "runs")) - (dbfname (if (equal? dbtype "main") - "main.db" - (conc rid ".db"))) - (dbfile (conc areapath "/.db/" dbfname)) - (ulexconn (rmt:connect alldat dbfname dbtype)) ;; ulexconn is our new *runremote*, it is a dbowner struct < pdat lastrefresh > - (udata (alldat-ulexdat alldat))) - (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid params))) - ;; need to call this on the other side - ;; (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) - - #;(with-input-from-string - (ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid (with-output-to-string (lambda ()(serialize params)))) - (lambda ()(deserialize))) +;; hold the send-receive proc in this parameter +(define rmtmod:send-receive #f) ;; (make-parameter #f)) + +;;====================================================================== +;; M I S C +;;====================================================================== + +;; hand off a call to one of the db:queries statements +;; added run-id to make looking up the correct db possible +;; +(define (rmt:general-call stmtname run-id . params) + (rmtmod:send-receive 'general-call run-id (append (list stmtname run-id) params))) + + + +;;====================================================================== +;; import an sexpr file into the db +;;====================================================================== + +(define (rmt:import-sexpr sexpr-file) + (if (file-exists? sexpr-file) + (let* ((data (with-input-from-file sexpr-file read))) + (for-each + (lambda (targ-dat) + (rmt:import-target targ-dat)) ;; ("target" ("run1" ("data" (1 ("field" . "value") ... + data)) + (let* ((msg (conc "ERROR: file "sexpr-file" not found"))) + (debug:print 0 *default-log-port* msg) + (cons #f msg)))) + +(define (rmt:import-target targ-dat) + (let* ((target (car targ-dat)) + (data (cdr targ-dat))) + (for-each + (lambda (run-dat) + (rmt:import-run target run-dat)) ;; ("runname" ("data" ("testid" ("field" . "value") ... + data))) + +(define (rmt:import-run target run-dat) + (let* ((runname (car run-dat)) + (all-dat (cdr run-dat)) + (tests-data (alist-ref "data" all-dat equal?)) + (run-meta (alist-ref "meta" all-dat equal?)) + (run-id (rmt:insert-run target runname run-meta))) + (for-each + (lambda (test-dat) + (let* ((test-id (car test-dat)) + (test-rec (cdr test-dat))) + (rmt:insert-test run-id test-rec))) + tests-data))) + +;; insert run if not there, return id either way +(define (rmt:insert-run target runname run-meta) + ;; look for id, return if found + (debug:print 0 *default-log-port* "Insert run: "target"/"runname) + (let* ((runs (rmtmod:send-receive 'simple-get-runs #f + ;; runpatt count offset target last-update) + (list runname #f #f target #f)))) + (if (null? runs) + (rmtmod:send-receive 'insert-run #f (list target runname run-meta)) + (simple-run-id (car runs))))) + +(define (rmt:insert-test run-id test-rec) + (let* ((testname (alist-ref "testname" test-rec equal?)) + (item-path (alist-ref "item_path" test-rec equal?))) + (debug:print 0 *default-log-port* " Insert test in run "run-id": "testname"/"item-path) + (rmtmod:send-receive 'insert-test run-id test-rec))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; Just some syntatic sugar +(define (rmt:register-test run-id test-name item-path) + (assert (number? run-id) "FATAL: Run id required.") + (rmt:general-call 'register-test run-id run-id test-name item-path)) + +(define (rmt:get-test-id run-id testname item-path) + (assert (number? run-id) "FATAL: Run id required.") + (rmtmod:send-receive 'get-test-id run-id (list run-id testname item-path))) + +(define (rmt:get-test-info-by-id run-id test-id) + (if (number? test-id) + (rmtmod:send-receive 'get-test-info-by-id run-id (list run-id test-id)) + (begin + (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) + (print-call-chain (current-error-port)) + #f))) + +(define (rmt:get-test-state-status-by-id run-id test-id) + (rmtmod:send-receive 'get-test-state-status-by-id run-id (list run-id test-id))) + +(define (rmt:test-get-rundir-from-test-id run-id test-id) + (rmtmod:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) + +;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) +;; (assert (number? run-id) "FATAL: Run id required.") +;; (let* ((test-path (if (string? work-area) +;; work-area +;; (rmt:test-get-rundir-from-test-id run-id test-id)))) +;; (debug:print 3 *default-log-port* "TEST PATH: " test-path) +;; (open-test-db test-path))) + +;; WARNING: This currently bypasses the transaction wrapped writes system +(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) + (assert (number? run-id) "FATAL: Run id required.") + (rmtmod:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) + +(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) + (assert (number? run-id) "FATAL: Run id required.") + (rmtmod:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) + +(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + (assert (number? run-id) "FATAL: Run id required.") + ;; (if (number? run-id) + (rmtmod:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) + ;; (begin + ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) + ;; (print-call-chain (current-error-port)) + ;; '()))) + +(define (rmt:get-tests-for-run-state-status run-id testpatt last-update) + (assert (number? run-id) "FATAL: Run id required.") + (rmtmod:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update))) + +;; get stuff via synchash +(define (rmt:synchash-get run-id proc synckey keynum params) + (assert (number? run-id) "FATAL: Run id required.") + (rmtmod:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) + +(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) + (assert (number? run-id) "FATAL: Run id required.") + (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) + +;; state and status are extra hints not usually used in the calculation +;; +(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) + (assert (number? run-id) "FATAL: Run id required.") + (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) + + +;;====================================================================== +;; Maintenance +;;====================================================================== + + +(define (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime) + (rmtmod:send-receive 'get-toplevels-and-incompletes run-id (list run-id running-deadtime remotehoststart-deadtime))) + +(define (rmt:get-status-from-final-status-file run-dir) + (let ((infile (conc run-dir "/.final-status"))) + ;; first verify we are able to write the output file + (if (not (file-read-access? infile)) + (begin + (debug:print 2 *default-log-port* "ERROR: cannot read " infile) + (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir) + #f + ) + (with-input-from-file infile read-lines) + ))) + +;; select end_time-now from +;; (select testname,item_path,event_time+run_duration as +;; end_time,strftime('%s','now') as now from tests where state in +;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); +;; +;; NOT EASY TO MIGRATE TO db{file,mod} +;; +(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + ;; The default running-deadtime is 720 seconds = 12 minutes. + ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) + (deadtime-trim (or ovr-deadtime cfg-deadtime)) + (server-start-allowance 200) + (server-overloaded-budget 200) + (launch-monitor-off-time (or test-stats-update-period 30)) + (launch-monitor-on-time-budget 30) + (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) + (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) + (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) + (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) + (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period) + + (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) + (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) + + (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime))) + (set! oldlaunched (list-ref dat 1)) + (set! toplevels (list-ref dat 2)) + (set! incompleted (list-ref dat 0))) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " + (length toplevels) " old LAUNCHED toplevel tests and " + (length incompleted) " tests marked RUNNING but apparently dead.") + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + ;; (launch:is-test-alive "localhost" 435) + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") + " as DEAD") + (for-each + (lambda (test-id) + (let* ((tinfo (rmt:get-test-info-by-id run-id test-id)) + (run-dir (db:test-get-rundir tinfo)) + (host (db:test-get-host tinfo)) + (pid (db:test-get-process_id tinfo)) + (result (rmt:get-status-from-final-status-file run-dir))) + (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "PASS" + "Test stopped responding but it has PASSED; marking it PASS in the DB.")) + (let ((is-alive (and (not (eq? pid 0)) ;; 0 is default in re-used field "attemptnum" where pid stored. + (commonmod:is-test-alive host pid)))) + (if is-alive + (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host + " has a process on pid " pid ", NOT setting to DEAD.") + (begin + (debug:print 0 *default-log-port* "INFO: test " test-id + " final state/status is not COMPLETED/PASS. It is " result) + (rmt:set-state-status-and-roll-up-items + run-id test-id 'foo "COMPLETED" "DEAD" + "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) + ;; call end of eud of run detection for posthook - from merge, is it needed? + ;; (launch:end-of-run-check run-id) + all-ids) + ))))) + + ) Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -22,10 +22,15 @@ (use format directory-utils) (declare (unit runconfig)) (declare (uses common)) +(declare (uses debugprint)) +(declare (uses commonmod)) + +(import commonmod + debugprint) (include "common_records.scm") (define (runconfig:read fname target environ-patt) (let ((ht (make-hash-table))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -15,33 +15,45 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) - posix-extras directory-utils pathname-expand typed-records format sxml-serializer - sxml-modifications matchable) - (declare (unit runs)) (declare (uses db)) (declare (uses common)) +(declare (uses debugprint)) +(declare (uses commonmod)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) (declare (uses archive)) -;; (declare (uses filedb)) +(declare (uses mtargs)) +(declare (uses rmtmod)) +(declare (uses dbfile)) + +(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) + posix-extras directory-utils pathname-expand typed-records format sxml-serializer + sxml-modifications matchable) + + (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") ;; (include "debugger.scm") +(import commonmod + debugprint + rmtmod + dbfile + (prefix mtargs args:)) + ;; use this struct to facilitate refactoring ;; (defstruct runs:dat reglen regfull @@ -801,48 +813,29 @@ (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) - ;; (th1 (make-thread (lambda () - ;; (handle-exceptions - ;; exn - ;; (begin - ;; (print-call-chain) - ;; (print " message: " ((condition-property-accessor 'exn 'message) exn))) - ;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - ;; (any->number reglen) all-tests-registry))) - ;; "runs:run-tests-queue")) - (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? - ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids))) - "runs: mark-incompletes"))) - ;; (thread-start! th1) - (thread-start! th2) - ;; (thread-join! th1) - ;; just do the main stuff in the main thread + (run-ids (rmt:get-all-run-ids))) + #;(for-each (lambda (run-id) + (if keep-going + (handle-exceptions + exn + (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) + (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) + run-ids) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - (any->number reglen) all-tests-registry) + (any->number reglen) all-tests-registry) (set! keep-going #f) - (thread-join! th2) - ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) - ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) - (launch:end-of-run-check run-id))) + (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 *default-log-port* " run-count " run-count) @@ -1279,15 +1272,23 @@ (list hed tal reg reruns)) ;; If no resources are available just kill time and loop again ;; ((not have-resources) ;; simply try again after waiting a second - (if (runs:lownoise "no resources" 60) + (if (runs:lownoise "no resources" 600) (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) + ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. - (thread-sleep! 0.25) + ;; (thread-sleep! 0.25) + + ;; new logic. + ;; If it has been more than 10 seconds since we were last here don't wait at all + ;; otherwise sleep 2 seconds to give db a rest and let dashboard read data + (if (runs:lownoise "frequent-no-resources" 10) + (thread-sleep! 0.25) ;; no significant delay + (thread-sleep! 2)) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (list (car newtal)(cdr newtal) reg reruns)) ;; This is the final stage, everything is in place so launch the test ;; @@ -1544,10 +1545,11 @@ (max-retries (configf:lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) + (incoming-tests '()) ;; queue up incoming tests here to tack on to tal when it gets low ;; (tdbdat (tasks:open-db)) (runsdat (make-runs:dat ;; hed: hed ;; tal: tal ;; reg: reg @@ -1775,11 +1777,11 @@ (last-jobs-check-time (runs:dat-last-jobs-check-time runsdat)) (should-check-jobs (match can-run-more-tests ((can-run-more-flag num-running nr-in-jobgroup max-concurrent-jobs . params) (if (< (- max-concurrent-jobs num-running) 25) (begin - (debug:print-info 0 *default-log-port* + (debug:print-info 2 *default-log-port* "less than 20 jobs headroom, ("max-concurrent-jobs "-"num-running")>20. Forcing prelaunch check.") #t) #f)) (else #f)))) ;; no record yet @@ -1855,14 +1857,22 @@ (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) - (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath + ;; BUG: This next line sucks up a lot of horsepower + ;; (set! tal (append tal (list newtestname))) + ;; (set! tal (cons newtestname tal)) ;; 4/6/2023 - try using cons, does it matter if the test gets added at the beginning? + (set! incoming-tests (cons newtestname incoming-tests)) + )) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) - + (if (and (< (length tal) 20) + (not (null? incoming-tests))) + (begin + (set! tal (append tal (reverse incoming-tests))) + (set! incoming-tests '()))) ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test @@ -2380,11 +2390,11 @@ (bup-mutex (make-mutex)) (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) - (dbfile (conc *toppath* "/.megatest/main.db")) + (dbfile (conc *toppath* "/.mtdb/main.db")) (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode (member action write-access-actions)) (debug:print-error 0 *default-log-port* dbfile " is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") (exit 1))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -14,31 +14,31 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; -(require-extension (srfi 18) extras tcp s11n) - -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest - directory-utils posix-extras matchable utils) - -(use spiffy uri-common intarweb http-client spiffy-request-vars) - (declare (unit server)) (declare (uses commonmod)) - +(declare (uses debugprint)) (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. ;; (declare (uses synchash)) -(declare (uses http-transport)) ;;(declare (uses rpc-transport)) (declare (uses launch)) ;; (declare (uses daemon)) +(declare (uses mtargs)) + +(use (srfi 18) extras s11n) +(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) +(use directory-utils posix-extras matchable utils) +(use spiffy uri-common intarweb http-client spiffy-request-vars) -(import commonmod) +(import commonmod + debugprint + (prefix mtargs args:)) (include "common_records.scm") (include "db_records.scm") (define (server:make-server-url hostport) @@ -65,21 +65,10 @@ ;;====================================================================== ;; Call this to start the actual server ;; -;; all routes though here end in exit ... -;; -;; start_server -;; -(define (server:launch run-id transport-type) - (case transport-type - ((http)(http-transport:launch)) - ;;((nmsg)(nmsg-transport:launch run-id)) - ;;((rpc) (rpc-transport:launch run-id)) - (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) - ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;; Get the transport @@ -112,39 +101,32 @@ (if *server-id* *server-id* (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic (set! *server-id* sig) *server-id*))) -;; When using zmq this would send the message back (two step process) -;; with spiffy or rpc this simply returns the return data to be returned -;; -(define (server:reply return-addr query-sig success/fail result) - (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) - ;; (send-message pubsock target send-more: #t) - ;; (send-message pubsock - (case (server:get-transport) - ((rpc) (db:obj->string (vector success/fail query-sig result))) - ((http) (db:obj->string (vector success/fail query-sig result))) - ((fs) result) - (else - (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) - result))) +;; ;; When using zmq this would send the message back (two step process) +;; ;; with spiffy or rpc this simply returns the return data to be returned +;; ;; +;; (define (server:reply return-addr query-sig success/fail result) +;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) +;; ;; (send-message pubsock target send-more: #t) +;; ;; (send-message pubsock +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) ;; Given an area path, start a server process ### NOTE ### > file 2>&1 ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* (;; (curr-host (get-host-name)) - ;; (attempt-in-progress (server:start-attempted? areapath)) - ;; (dot-server-url (server:check-if-running areapath)) - ;; (curr-ip (server:get-best-guess-address curr-host)) - ;; (curr-pid (current-process-id)) - ;; (homehost (server:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - ;; (target-host (car homehost)) - (testsuite (common:get-testsuite-name)) + (let* ((testsuite (common:get-testsuite-name)) (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") "")) (cmdln (conc (common:get-megatest-exe) " -server - ";; (or target-host "-") @@ -190,46 +172,48 @@ (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f))) (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) - bad-dat) ;; no idea what went wrong, call it a bad server - (with-input-from-file - logf - (lambda () - (let loop ((inl (read-line)) - (lnum 0)) - (if (not (eof-object? inl)) - (let ((mlst (string-match server-rx inl)) - (dbprep (string-match dbprep-rx inl))) - (if dbprep (set! dbprep-found 1)) - (if (not mlst) - (if (< lnum 500) ;; give up if more than 500 lines of server log read - (loop (read-line)(+ lnum 1)) - (begin + exn + (begin + ;; WARNING: this is potentially dangerous to blanket ignore the errors + (if (file-exists? logf) + (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) + bad-dat) ;; no idea what went wrong, call it a bad server + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl))) + (if dbprep (set! dbprep-found 1)) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (begin (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) bad-dat)) - (match mlst - ((_ host port start server-id pid) - (list host - (string->number port) - (string->number start) - server-id - (string->number pid))) - (else - (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) - bad-dat)))) - (begin - (if dbprep-found - (begin - (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) - (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) - bad-dat)))))))) + (match mlst + ((_ host port start server-id pid) + (list host + (string->number port) + (string->number start) + server-id + (string->number pid))) + (else + (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) + bad-dat)))) + (begin + (if dbprep-found + (begin + (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) + (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) + bad-dat)))))))) ;; ;; get a list of servers from the log files, with all relevant data ;; ;; ( mod-time host port start-time pid ) ;; ;; ;; (define (server:get-list areapath #!key (limit #f)) @@ -419,11 +403,12 @@ ;; oldest server alive determines host then choose random of youngest ;; five servers on that host ;; (define (server:get-servers-info areapath) - (let* ((servinfodir (conc *toppath*"/.servinfo"))) + ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") + (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) (if (not (file-exists? servinfodir)) (create-directory servinfodir)) (let* ((allfiles (glob (conc servinfodir"/*"))) (res (make-hash-table))) (for-each @@ -432,15 +417,45 @@ (serverdat (server:logf-get-start-info f))) (match serverdat ((host port start server-id pid) (if (and host port start server-id pid) (hash-table-set! res hostport serverdat) - (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat))) + (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) (else - (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat))))) + (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) allfiles) res))) + +;; check the .servinfo directory, are there other servers running on this +;; or another host? +;; +;; returns #t => ok to start another server +;; #f => not ok to start another server +;; +(define (server:minimal-check areapath) + (server:clean-up-old areapath) + (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) + (servrs (glob (conc srvdir"/*"))) + (thishostip (server:get-best-guess-address (get-host-name))) + (thisservrs (glob (conc srvdir"/"thishostip":*"))) + (homehostinf (server:choose-server areapath 'homehost)) + (havehome (car homehostinf)) + (wearehome (cdr homehostinf))) + (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome + ", numservers: "(length thisservrs)) + (cond + ((not havehome) #t) ;; no homehost yet, go for it + ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another + ((and havehome (not wearehome)) #f) ;; we are not the home host + ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running + (else + (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) + #t)))) + + +(define server-last-start 0) + ;; oldest server alive determines host then choose random of youngest ;; five servers on that host ;; ;; mode: @@ -453,29 +468,47 @@ ;; 1. sort by age ascending and ping until good ;; find alive rand from youngest ;; 1. sort by age descending ;; 2. take five ;; 3. check alive, discard if not and repeat + ;; first we clean up old server files + (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode)) + (server:clean-up-old areapath) + (let* ((since-last (- (current-seconds) server-last-start)) + (server-start-delay 10)) + (if ( < (- (current-seconds) server-last-start) 10 ) + (begin + (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) + (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") + (thread-sleep! server-start-delay) + ) + (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) + ) + ) (let* ((serversdat (server:get-servers-info areapath)) (servkeys (hash-table-keys serversdat)) - (by-time-asc (if (not (null? servkeys)) + (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last (sort servkeys ;; list of "host:port" (lambda (a b) (>= (list-ref (hash-table-ref serversdat a) 2) (list-ref (hash-table-ref serversdat b) 2)))) '()))) + (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) + (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) (if (not (null? by-time-asc)) (let* ((oldest (last by-time-asc)) (oldest-dat (hash-table-ref serversdat oldest)) (host (list-ref oldest-dat 0)) (all-valid (filter (lambda (x) (equal? host (list-ref (hash-table-ref serversdat x) 0))) by-time-asc)) - (best-five (lambda () - (if (> (length all-valid) 5) - (take all-valid 5) - all-valid))) + (best-ten (lambda () + (if (> (length all-valid) 11) + (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out + (if (> (length all-valid) 8) + (drop-right all-valid 1) + all-valid)))) (names->dats (lambda (names) (map (lambda (x) (hash-table-ref serversdat x)) names))) (am-home? (lambda () @@ -483,44 +516,81 @@ (bestadrs (server:get-best-guess-address currhost))) (or (equal? host currhost) (equal? host bestadrs)))))) (case mode ((info) - (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) - (print "youngest: "(hash-table-ref serversdat (car all-valid)))) + (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) + (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid)))) ((home) host) ((homehost) (cons host (am-home?))) ;; shut up old code ((home?) (am-home?)) - ((best-five)(names->dats (best-five))) + ((best-ten)(names->dats (best-ten))) ((all-valid)(names->dats all-valid)) - ((best) (let* ((best-five (best-five)) - (len (length best-five))) - (hash-table-ref serversdat (list-ref best-five (random len))))) + ((best) (let* ((best-ten (best-ten)) + (len (length best-ten))) + (hash-table-ref serversdat (list-ref best-ten (random len))))) ((count)(length all-valid)) (else (debug:print 0 *default-log-port* "ERROR: invalid command "mode) #f))) (begin (server:run areapath) - (thread-sleep! 3) + (set! server-last-start (current-seconds)) + ;; (thread-sleep! 3) (case mode ((homehost) (cons #f #f)) (else #f)))))) + +(define (server:get-servinfo-dir areapath) + (let* ((spath (conc areapath"/.servinfo"))) + (if (not (file-exists? spath)) + (create-directory spath #t)) + spath)) + +(define (server:clean-up-old areapath) + ;; any server file that has not been touched in ten minutes is effectively dead + (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) + (for-each + (lambda (sfile) + (let* ((modtime (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) + (current-seconds)) + (file-modification-time sfile)))) + (if (and (number? modtime) + (> (- (current-seconds) modtime) + 600)) + (begin + (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") + (handle-exceptions + exn + (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) + (delete-file sfile)))))) + sfiles))) ;; would like to eventually get rid of this ;; (define (common:on-homehost?) - (server:choose-server *toppath* 'home?)) + (if (eq? (rmt:transport-mode) 'http) + (server:choose-server *toppath* 'home?) + #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work ;; kind start up of server, wait before allowing another server for a given ;; area to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least seconds old ;; (server:wait-for-server-start-last-flag areapath) - (if (< (server:choose-server areapath 'count) 10) + (let loop () + (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2) + (begin + (if (common:low-noise-print 30 "our-host-load") + (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server.")) + (loop)))) + (if (< (server:choose-server areapath 'count) 20) (server:run areapath)) #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((lock-file (conc areapath "/logs/server-start.lock"))) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 25) @@ -538,11 +608,12 @@ (let loop ((server-info (server:check-if-running areapath)) (try-num 0)) (if (or server-info (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. (server:record->url server-info) - (let ((num-ok (length (server:choose-server areapath 'all-valid)))) + (let* ( (servers (server:choose-server areapath 'all-valid)) + (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0))) (if (and (> try-num 0) ;; first time through simply wait a little while then try again (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one (server:run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath) @@ -555,11 +626,11 @@ ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) ;; #!key (numservers "2")) (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed - (servers (server:choose-server areapath 'best-five))) ;; (server:get-best (server:get-list areapath)))) + (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath)))) (if (or (and servers (null? servers)) (not servers)) ;; (and (list? servers) ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers @@ -587,92 +658,91 @@ (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) #f) - (match-let (((mod-time hostname port start-time server-id pid) + (match-let (((hostname port start-time server-id pid) servr)) (tasks:kill-server hostname pid)))) -;; called in megatest.scm, host-port is string hostname:port -;; -;; NOTE: This is NOT called directly from clients as not all transports support a client running -;; in the same process as the server. -;; -(define (server:ping host-port-in server-id #!key (do-exit #f)) - (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - #f ;; (server:check-if-running *toppath*) - ;; (if (number? host-port-in) ;; we were handed a server-id - ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; ;; (print "srec: " srec " host-port-in: " host-port-in) - ;; (if srec - ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - ;; (conc "no such server-id " host-port-in))) - host-port-in))) ;; ) - (let* ((host-port (if host:port - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f)) - #f))) -;; (toppath (launch:setup))) - ;; (print "host-port=" host-port) - (if (not host-port) - (begin - (if host-port-in - (debug:print 0 *default-log-port* "ERROR: bad host:port")) - (if do-exit (exit 1)) - #f) - (let* ((iface (car host-port)) - (port (cadr host-port)) - (server-dat (http-transport:client-connect iface port server-id)) - (login-res (rmt:login-no-auto-client-setup server-dat))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f))))))) - -;; run ping in separate process, safest way in some cases -;; -(define (server:ping-server ifaceport) - (with-input-from-pipe - (conc (common:get-megatest-exe) " -ping " ifaceport) - (lambda () - (let loop ((inl (read-line)) - (res "NOREPLY")) - (if (eof-object? inl) - (case (string->symbol res) - ((NOREPLY) #f) - ((LOGIN_OK) #t) - (else #f)) - (loop (read-line) inl)))))) - -;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). -;; -(define (server:login toppath) - (lambda (toppath) - (set! *db-last-access* (current-seconds)) ;; might not be needed. - (if (equal? *toppath* toppath) - #t - #f))) +;; ;; called in megatest.scm, host-port is string hostname:port +;; ;; +;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; ;; in the same process as the server. +;; ;; +;; (define (server:ping host:port server-id #!key (do-exit #f)) +;; (let* ((host-port (cond +;; ((string? host:port) +;; (let ((slst (string-split host:port ":"))) +;; (if (eq? (length slst) 2) +;; (list (car slst)(string->number (cadr slst))) +;; #f))) +;; (else +;; #f)))) +;; (cond +;; ((and (list? host-port) +;; (eq? (length host-port) 2)) +;; (let* ((myrunremote (make-and-init-remote *toppath*)) +;; (iface (car host-port)) +;; (port (cadr host-port)) +;; (server-dat (client:connect iface port server-id myrunremote)) +;; (login-res (rmt:login-no-auto-client-setup myrunremote))) +;; (http-transport:close-connections myrunremote) +;; (if (and (list? login-res) +;; (car login-res)) +;; (begin +;; ;; (print "LOGIN_OK") +;; (if do-exit (exit 0)) +;; #t) +;; (begin +;; ;; (print "LOGIN_FAILED") +;; (if do-exit (exit 1)) +;; #f)))) +;; (else +;; (if host:port +;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) +;; (if do-exit +;; (exit 1) +;; #f))))) +;; +;; ;; run ping in separate process, safest way in some cases +;; ;; +;; (define (server:ping-server ifaceport) +;; (with-input-from-pipe +;; (conc (common:get-megatest-exe) " -ping " ifaceport) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res "NOREPLY")) +;; (if (eof-object? inl) +;; (case (string->symbol res) +;; ((NOREPLY) #f) +;; ((LOGIN_OK) #t) +;; (else #f)) +;; (loop (read-line) inl)))))) +;; +;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; ;; +;; (define (server:login toppath) +;; (lambda (toppath) +;; (set! *db-last-access* (current-seconds)) ;; might not be needed. +;; (if (equal? *toppath* toppath) +;; #t +;; #f))) ;; timeout is hms string: 1h 5m 3s, default is 1 minute ;; This is currently broken. Just use the number of hours with no unit. ;; Default is 60 seconds. ;; (define (server:expiration-timeout) - (let ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (and (string? tmo) - (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below - (* 3600 (string->number tmo)) - 60))) + (let* ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (string? tmo) + (let* ((num (string->number tmo))) + (if num + (* 3600 num) + (common:hms-string->seconds tmo))) + 600 ;; this is the default + ))) (define (server:get-best-guess-address hostname) (let ((res #f)) (for-each (lambda (adr) @@ -683,125 +753,13 @@ (string-intersperse (map number->string (u8vector->list (if res res (hostname->ip hostname)))) "."))) -;; (define server:sync-lock-token "SERVER_SYNC_LOCK") -;; (define (server:release-sync-lock) -;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -;; (define (server:have-sync-lock?) -;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) -;; (have-lock? (car have-lock-pair)) -;; (lock-time (cdr have-lock-pair)) -;; (lock-age (- (current-seconds) lock-time))) -;; (cond -;; (have-lock? #t) -;; ((>lock-age -;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) -;; (server:release-sync-lock) -;; (server:have-sync-lock?)) -;; (else #f)))) - ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") (lambda () - (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")) - #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh - (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) - (tmp-area (common:get-db-tmp-area)) - (tmp-db (conc tmp-area "/megatest.db")) - (staging-file (conc *toppath* "/.megatest.db")) - (mtdbfile (conc *toppath* "/megatest.db")) - (lockfile (common:get-sync-lock-filepath)) - (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (sync-cmd (if fork-to-background - (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") - sync-cmd-core)) - (default-min-intersync-delay 2) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) - (default-duty-cycle 0.1) - (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) - (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) - (calculate-off-time (lambda (work-duration duty-cycle) - (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) - (off-time min-intersync-delay) ;; adjusted in closure below. - (do-a-sync - (lambda () - (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) - (let* ((finalres - (let retry-loop ((num-tries 0)) - (if (common:simple-file-lock lockfile) - (begin - (cond - ((not (or fork-to-background persist-until-sync)) - (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay - " , off-time="off-time" seconds ]") - (thread-sleep! (max off-time min-intersync-delay))) - (else - (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd)) - (dbbackupfile (conc mtdbfile ".backup")) - (res2 - (cond - ((eq? 0 res ) - (handle-exceptions - exn - #f - (if (file-exists? dbbackupfile) - (delete-file* dbbackupfile) - ) - (if (eq? 0 (file-size sync-log)) - (delete-file* sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - - (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) - (set! off-time (calculate-off-time - last-sync-seconds - (cond - ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) - duty-cycle) - (else - (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) - default-duty-cycle)))) - - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) - 'sync-completed)) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) - #f)))) - (common:simple-file-release-lock lockfile) - (BB> "released lockfile: " lockfile) - (when (common:file-exists? lockfile) - (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) - res2) ;; end let - );; end begin - ;; else - (cond - (persist-until-sync - (thread-sleep! 1) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") - (retry-loop (add1 num-tries))) - (else - (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - 'parallel-sync-in-progress)) - ) ;; end if got lockfile - ) - )) - (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) - finalres) - ) ;; end lambda - )) - do-a-sync)) + (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))) Index: subrun.scm ================================================================== --- subrun.scm +++ subrun.scm @@ -15,25 +15,24 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + +(declare (unit subrun)) +(declare (uses debugprint)) +(declare (uses db)) +(declare (uses common)) +(declare (uses commonmod)) +(declare (uses mt)) (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format call-with-environment-variables) -(declare (unit subrun)) -;;(declare (uses runs)) -(declare (uses db)) -(declare (uses common)) -;;(declare (uses items)) -;;(declare (uses runconfig)) -;;(declare (uses tests)) -;;(declare (uses server)) -(declare (uses mt)) -;;(declare (uses archive)) -;; (declare (uses filedb)) + +(import commonmod + debugprint) ;(include "common_records.scm") ;;(include "key_records.scm") (include "db_records.scm") ;; provides db:test-get-id ;;(include "run_records.scm") @@ -135,11 +134,11 @@ (subrun:unset-subrun-removed test-run-dir)) (let* ((log-prefix "run") (switches (subrun:selector+log-switches test-run-dir log-prefix)) (run-wait (equal? run-mode "yes")) - (cmd (conc "megatest " sub-cmd " " switches" " + (cmd (conc (common:get-mtexe)" "sub-cmd" "switches" " (if run-wait "-run-wait " "")))) cmd)) (define (subrun:sanitize-path inpath) @@ -232,20 +231,24 @@ (list (car x) (cdr x))) switch-alist)) " "))) res)) +;; NOTE: Here we run sub megatest but this is not intended for one version +;; of megatest to test another version. Thus we propagate the (define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) - (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) - (cmd (conc "megatest " selector-switches " " action-switches-str )) + (let* ((mtpathdir (common:get-megatest-exe-dir)) + (mtexe (common:get-mtexe)) + (selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) + (cmd (conc mtexe" "selector-switches" "action-switches-str )) (pid #f) (proc (lambda () (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (list (cons "PATH" (common:get-megatest-exe-path))) (lambda () (common:without-vars proc "^MT_.*"))) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (if (eq? pid-val 0) DELETED synchash.scm Index: synchash.scm ================================================================== --- synchash.scm +++ /dev/null @@ -1,133 +0,0 @@ -;;====================================================================== -;; Copyright 2006-2012, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . -;; -;;====================================================================== - -;;====================================================================== -;; A hash of hashes that can be kept in sync by sending minial deltas -;;====================================================================== - -(use format) -(use srfi-1 srfi-69 sqlite3) -(import (prefix sqlite3 sqlite3:)) - -(declare (unit synchash)) -(declare (uses db)) -(declare (uses server)) -(include "db_records.scm") - -(define (synchash:make) - (make-hash-table)) - -;; given an alist of objects '((id obj) ...) -;; 1. remove unchanged objects from the list -;; 2. create a list of removed objects by id -;; 3. remove removed objects from synchash -;; 4. replace or add new or changed objects to synchash -;; -(define (synchash:get-delta indat synchash) - (let ((deleted '()) - (changed '()) - (found '()) - (orig-keys (hash-table-keys synchash))) - (for-each - (lambda (item) - (let* ((id (car item)) - (dat (cadr item)) - (ref (hash-table-ref/default synchash id #f))) - (if (not (equal? dat ref)) ;; item changed or new - (begin - (set! changed (cons item changed)) - (hash-table-set! synchash id dat))) - (set! found (cons id found)))) - indat) - (for-each - (lambda (id) - (if (not (member id found)) - (begin - (set! deleted (cons id deleted)) - (hash-table-delete! synchash id)))) - orig-keys) - (list changed deleted) - ;; (list indat '()) ;; just for debugging - )) - -;; keynum => the field to use as the unique key (usually 0 but can be other field) -;; -(define (synchash:client-get proc synckey keynum synchash run-id . params) - (let* ((data (rmt:synchash-get run-id proc synckey keynum params)) - (newdat (car data)) - (removs (cadr data)) - (myhash (hash-table-ref/default synchash synckey #f))) - (if (not myhash) - (begin - (set! myhash (make-hash-table)) - (hash-table-set! synchash synckey myhash))) - (for-each - (lambda (item) - (let ((id (car item)) - (dat (cadr item))) - ;; (debug:print-info 2 *default-log-port* "Processing item: " item) - (hash-table-set! myhash id dat))) - newdat) - (for-each - (lambda (id) - (hash-table-delete! myhash id)) - removs) - ;; WHICH ONE!? - ;; data)) ;; return the changed and deleted list - (list newdat removs))) ;; synchash)) - -(define *synchashes* (make-hash-table)) - -(define (synchash:server-get dbstruct run-id proc synckey keynum params) - ;; (debug:print-info 2 *default-log-port* "synckey: " synckey ", keynum: " keynum ", params: " params) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (synchash (hash-table-ref/default *synchashes* synckey #f)) - (newdat (apply (case proc - ((db:get-runs) db:get-runs) - ((db:get-tests-for-run-mindata) db:get-tests-for-run-mindata) - ((db:get-test-info-by-ids) db:get-test-info-by-ids) - (else - (print "ERROR: sync for hash " proc " not setup! Edits needed in synchash.scm") - print)) - db params)) - (postdat #f) - (make-indexed (lambda (x) - (list (vector-ref x keynum) x)))) - ;; Now process newdat based on the query type - (set! postdat (case proc - ((db:get-runs) - ;; (debug:print-info 2 *default-log-port* "Get runs call") - (let ((header (vector-ref newdat 0)) - (data (vector-ref newdat 1))) - ;; (debug:print-info 2 *default-log-port* "header: " header ", data: " data) - (cons (list "header" header) ;; add the header keyed by the word "header" - (map make-indexed data)))) ;; add each element keyed by the keynum'th val - (else - ;; (debug:print-info 2 *default-log-port* "Non-get runs call") - (map make-indexed newdat)))) - ;; (debug:print-info 2 *default-log-port* "postdat: " postdat) - ;; (if (not indb)(sqlite3:finalize! db)) - (if (not synchash) - (begin - (set! synchash (make-hash-table)) - (hash-table-set! *synchashes* synckey synchash))) - (synchash:get-delta postdat synchash))) - Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -16,19 +16,30 @@ ;; along with Megatest. If not, see . ;; ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') +(declare (unit tasks)) +(declare (uses debugprint)) +(declare (uses dbfile)) +(declare (uses db)) +(declare (uses dbmod)) +(declare (uses rmt)) +(declare (uses rmtmod)) +(declare (uses common)) +(declare (uses pgdb)) +(declare (uses commonmod)) +(declare (uses mtargs)) + (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) (import (prefix sqlite3 sqlite3:)) -(declare (unit tasks)) -(declare (uses dbfile)) -(declare (uses db)) -(declare (uses rmt)) -(declare (uses common)) -(declare (uses pgdb)) +(import commonmod + debugprint + dbmod + rmtmod + (prefix mtargs args:)) (import dbfile) ;; (import pgdb) ;; pgdb is a module (include "task_records.scm") @@ -36,42 +47,10 @@ ;;====================================================================== ;; Tasks db ;;====================================================================== -;; wait up to aprox n seconds for a journal to go away -;; -(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) - (if (not (string? path)) - (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)") - (let ((fullpath (conc path "-journal"))) - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* " exn=" (condition->list exn)) - (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") - #t) ;; if stuff goes wrong just allow it to move on - (let loop ((journal-exists (common:file-exists? fullpath)) - (count n)) ;; wait ten times ... - (if journal-exists - (begin - (if (and waiting-msg - (eq? (modulo n 30) 0)) - (debug:print 0 *default-log-port* waiting-msg)) - (if (> count 0) - (begin - (thread-sleep! 1) - (loop (common:file-exists? fullpath) - (- count 1))) - (begin - (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") - (if remove (system (conc "rm -rf " fullpath))) - #f))) - #t)))))) - (define (tasks:get-task-db-path) (let ((dbdir (or (configf:lookup *configdat* "setup" "monitordir") (configf:lookup *configdat* "setup" "dbdir") (conc (common:get-linktree) "/.db")))) (handle-exceptions @@ -1077,13 +1056,10 @@ (changed (if (and target run-name) (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt) (rmt:get-changed-record-ids last-sync-time))) (run-ids (alist-ref 'runs changed)) (test-ids (alist-ref 'tests changed)) - (test-step-ids (alist-ref 'test_steps changed)) - (test-data-ids (alist-ref 'test_data changed)) - (run-stat-ids (alist-ref 'run_stats changed)) (area-tag (if (args:get-arg "-area-tag") (args:get-arg "-area-tag") (if (args:get-arg "-area") (args:get-arg "-area") "")))) @@ -1100,13 +1076,10 @@ (if (not (null? test-ids)) (begin (debug:print-info 0 *default-log-port* "syncing tests: " test-ids) (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time) (debug:print-info 0 *default-log-port* "syncing test steps") - (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) - (debug:print-info 0 *default-log-port* "syncing test data") - (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) ) ) (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds)))) (debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time) (if (not (and target run-name)) Index: tcmt.scm ================================================================== --- tcmt.scm +++ tcmt.scm @@ -21,19 +21,25 @@ ;; ;; 1. Run the megatest process and pass it all the needed parameters ;; 2. Every five seconds check for state/status changes and print the info ;; +(declare (uses mtargs)) +(declare (uses rmt)) +(declare (uses rmtmod)) +(declare (uses common)) +;; (declare (uses megatest-version)) +(declare (uses commonmod)) + (use srfi-1 posix srfi-69 srfi-18 regex defstruct) (use trace) ;; (trace-call-sites #t) -(declare (uses margs)) -(declare (uses rmt)) -(declare (uses common)) -;; (declare (uses megatest-version)) +(import commonmod + rmtmod + (prefix mtargs args:)) (include "megatest-version.scm") (include "megatest-fossil-hash.scm") (include "db_records.scm") ADDED tcp-transportmod.scm Index: tcp-transportmod.scm ================================================================== --- /dev/null +++ tcp-transportmod.scm @@ -0,0 +1,933 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit tcp-transportmod)) +(declare (uses debugprint)) +(declare (uses commonmod)) +(declare (uses dbfile)) +(declare (uses dbmod)) +(declare (uses portlogger)) + +(use address-info tcp) + +(module tcp-transportmod + * + + (import scheme + (prefix sqlite3 sqlite3:) + chicken + data-structures + + address-info + directory-utils + extras + files + hostinfo + matchable + md5 + message-digest + ports + posix + regex + regex-case + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + stack + typed-records + tcp-server + tcp + + debugprint + commonmod + dbfile + dbmod + portlogger + ) + +;;====================================================================== +;; client +;;====================================================================== + +;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic + +;; Used ONLY for client +;; +(defstruct tt-conn + host + port + host-port + dbfname + server-id + server-start + servinf-file + pid +) + +;; Used for BOTH clients and servers +(defstruct tt + ;; client related + (conns (make-hash-table)) ;; dbfname -> conn + + ;; server related + (state 'starting) + (areapath #f) + (host #f) + (port #f) + (conn #f) + (cleanup-proc #f) + (handler #f) ;; receives data and responds + (socket #f) + (thread #f) + (host-port #f) + (cmd-thread #f) + (ro-mode #f) + (ro-mode-checked #f) + (last-access (current-seconds)) + (servinf-file #f) + (last-serv-start 0) + ) + +;; parameters +;; +(define tt-server-timeout-param (make-parameter 600)) +(define tt-server-profile-string (make-parameter "")) + +;; make ttdat visible +(define *server-info* #f) + +(define (tt:make-remote areapath) + (make-tt areapath: areapath)) + +;; 1 ... or #f +;; and check that dbfname matches. FIXME: the propagation of dbfname and run-id +;; might not make the best sense +;; +(define (tt:valid-run-id run-id dbfname) + (and (or (number? run-id) + (not run-id)) + (equal? (dbfile:run-id->dbfname run-id) dbfname))) + +(tcp-buffer-size 2048) +;; (max-connections 4096) + +;; do all the busy work of finding and setting up conn for +;; connecting to a server +;; +(define (tt:client-connect-to-server ttdat dbfname run-id testsuite) + (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id) + (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) + (server-start-proc (lambda () + (tt:server-process-run + (tt-areapath ttdat) + testsuite ;; (dbfile:testsuite-name) + (common:find-local-megatest) + run-id)))) + (if conn + (begin + ; (debug:print-info 0 *default-log-port* "already connected to the server") + conn) ;; we are already connected to the server + (let* ((sdat (tt:get-current-server-info ttdat dbfname))) + (match sdat + ((host port start-time server-id pid dbfname2 servinffile) + (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.") + ;(debug:print-info 0 *default-log-port* "in match servinffile:" servinffile) + (let* ((host-port (conc host":"port)) + (conn (make-tt-conn + host: host + port: port + host-port: host-port + dbfname: dbfname + servinf-file: servinffile + server-id: server-id + server-start: start-time + pid: pid))) + ;; verify we can talk to this server + (let* ((result (tt:timed-ping host port server-id)) + (ping-res (car result)) + (ping (cdr result))) + (debug:print-info 0 *default-log-port* "ping time: " ping) + (case ping-res + ((running) + (hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good? + conn) + ((starting) + (thread-sleep! 0.5) + (tt:client-connect-to-server ttdat dbfname run-id testsuite)) + (else + (let* ((curr-secs (current-seconds))) + ;; rm the (last server) would go here + (if (> (- curr-secs (tt-last-serv-start ttdat)) 10) + (begin + (tt-last-serv-start-set! ttdat curr-secs) + (server-start-proc))) ;; start server if 30 sec since last attempt + (thread-sleep! 1) + (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) + (else ;; no good server found, if haven't started server in > 5 secs, start another + (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; BUG - grow this number really do not want to swamp the machine with servers + (begin + (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname) + (server-start-proc) + (tt-last-serv-start-set! ttdat (current-seconds)))) + (thread-sleep! 1) + (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) + +(define (tt:timed-ping host port server-id) + (let* ((start-time (current-milliseconds)) + (result (tt:ping host port server-id))) + (cons result (- (current-milliseconds) start-time)))) + + +(define (tt:ping host port server-id #!optional (tries-left 5)) + (let* ((res (tt:send-receive-direct host port `(ping #f #f #f) ping-mode: #t)) ;; please send me your server-id + (try-again (lambda () + (if (> tries-left 0) + (begin + (thread-sleep! 1) + (tt:ping host port server-id (- tries-left 1))) + #f)))) + ;; + ;; need two threads, one a 5 second timer + ;; + (match res + ((status errmsg result meta) + (if (equal? result server-id) + (let* ((server-state (alist-ref 'sstate meta))) + ;; (debug:print 0 *default-log-port* "Ping to "host":"port" successful.") + (or server-state 'unk)) ;; then we are good + (begin + (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result) + #f))) + (else + ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res) + (try-again))))) + +;; client side handler +;; +;;(tt:handler # get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") +;; +(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) + ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. + (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) + (if conn + ;; have connection, call the server + (let* ((res (tt:send-receive ttdat conn cmd run-id params))) + ;; res is (status errmsg result meta) + ; (debug:print 0 *default-log-port* "conn:" conn " res: " res) + (match res + ((status errmsg result meta) + (if (list? meta) + (let* ((delay-wait (alist-ref 'delay-wait meta))) + (if (and (number? delay-wait) + (> delay-wait 0)) + (begin + (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds") + (thread-sleep! delay-wait))))) + (case status + ((busy) ;; result will be how long the server wants you to delay + (let* ((dly (if (number? result) result 0.1))) + (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is busy, will try again in "dly" seconds.") + (thread-sleep! dly) + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))) + ((loaded) + (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, slowing queries.") + (tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn)) + result) ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) + (else + result))) + (else ;; did not receive properly formated result + (if (not res) ;; tt:handler is telling us that communication failed + (let* ((host (tt-conn-host conn)) + (port (tt-conn-port conn)) + ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db + (pid (tt-conn-pid conn)) + (servinf (tt-conn-servinf-file conn))) + ;;(servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath) + (hash-table-set! (tt-conns ttdat) dbfname #f) + (if (and servinf (file-exists? servinf)) + (begin + (if (< attemptnum 10) + (begin + (thread-sleep! 0.5) + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) + (begin + (debug:print 0 *default-log-port* "INFO: no response from server "host":"port" for "dbfname) + (if (and (file-exists? servinf) + (> (- (current-seconds)(file-modification-time servinf)) 60)) + (begin + (debug:print 0 *default-log-port* "INFO: "servinf" file seems old and no ping response, removing it.") + (handle-exceptions + exn + #f + (delete-file* servinf)) + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) + (begin + ;; start server - addressed in client-connect-to-server + ;; delay - addressed in client-connect-to-server + ;; try again + (thread-sleep! 0.25) ;; dunno, I think this needs to be here + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) + )))) + (begin ;; no server file, delay and try again + (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf) + (thread-sleep! 0.5) + (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)))) + (begin ;; this case is where res is malformed. Probably should abort + (assert #f "FATAL: tt:handler received bad data "res) + ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.") + ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe) + ))))) + (begin + (thread-sleep! 1) ;; no conn yet set up, give it a rest and try again + (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))))) + +(define (tt:bid-for-servership run-id) + #f) + +;; gets server info and appends path to server file +;; sorts by age, oldest first +;; +;; returns list of (host port startseconds server-id servinfofile) +;; +(define (tt:get-server-info-sorted ttdat dbfname) + (let* ((areapath (tt-areapath ttdat)) + (sfiles (tt:find-server areapath dbfname)) + (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read + (sorted (sort sdats (lambda (a b) + (let* ((starta (list-ref a 2)) + (startb (list-ref b 2))) + (if (eq? starta startb) + (string>? (list-ref a 3)(list-ref b 3)) ;; if servers started at same time look at server-id + (< starta startb)))))) + (count 0)) + (for-each + (lambda (rec) + (if (or (> (length sorted) 1) + (common:low-noise-print 120 "server info sorted")) + (debug:print 0 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", "))) + (set! count (+ count 1))) + sorted) + sorted)) + +(define (tt:get-current-server-info ttdat dbfname) + (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.") + ;; + ;; TODO - replace most of below with tt;get-server-info-sorted + ;; + (let* ((areapath (tt-areapath ttdat)) + (sfiles (tt:find-server areapath dbfname)) + (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read + (sorted (sort sdats (lambda (a b) + (< (list-ref a 2)(list-ref b 2)))))) + (if (null? sorted) + #f ;; we'll want to wait until extra servers have exited + (car sorted)))) + +(define (tt:send-receive ttdat conn cmd run-id params) + (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn))) + (host (tt-conn-host conn)) + (port (tt-conn-port conn)) + (dat (list cmd run-id params #f))) ;; no meta data yet + (tt:send-receive-direct host port dat))) + +(defstruct tt:backoff + (last-ioerr (current-seconds)) + (last-adj-t (current-seconds)) + (wait-delay 0.1)) + +(define *tt:backoff-smoothing* (make-hash-table)) ;; host:port => lastaccess backoffdelay ) + +(define (tt:backoff-incr host port) ;; call if tcp fails i/o net + (let* ((host-port (conc host":"port)) + (bkoff (hash-table-ref/default *tt:backoff-smoothing* host-port #f))) + (if bkoff + (begin + (tt:backoff-last-ioerr-set! bkoff (current-seconds)) + (tt:backoff-wait-delay-set! bkoff (+ (tt:backoff-wait-delay bkoff) 0.1))) + (hash-table-set! *tt:backoff-smoothing* host-port (make-tt:backoff))))) + +(define (tt:backoff-decr-and-wait host port) + (let* ((host-port (conc host":"port)) + (bkoff (hash-table-ref/default *tt:backoff-smoothing* host-port #f))) + (if bkoff + (let* ((wait-delay (tt:backoff-wait-delay bkoff)) + (last-ioerr (tt:backoff-last-ioerr bkoff)) + (last-adj-t (tt:backoff-last-adj-t bkoff)) + (delta (- (current-seconds) last-adj-t)) + (adj (* delta 0.001)) ;; it takes 100 seconds to recover from hitting an io err + (new-wait (if (> wait-delay 0) + (if (> adj wait-delay) + 0 + (- wait-delay adj)) + 0))) + (if (> new-wait 0) + (begin + (if (common:low-noise-print 10 "delay wait message") + (debug:print-info 0 *default-log-port* "Server loaded, DelayWait: "new-wait)) + (tt:backoff-wait-delay-set! bkoff new-wait) + (tt:backoff-last-adj-t-set! bkoff (current-seconds)) + (thread-sleep! new-wait)) + (hash-table-delete! *tt:backoff-smoothing* host-port)))))) + +(define (tt:send-receive-direct host port dat #!key (ping-mode #f)(tries-remaining 25)) + (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port) + (tt:backoff-decr-and-wait host port) + (let* ((retry (lambda () + (tt:send-receive-direct host port dat tries-remaining: (- tries-remaining 1)))) + (full-err-print (lambda (exn msg) + (if (condition? exn) + (begin + (pp (condition->list exn) *default-log-port*) + (pp dat *default-log-port*) + (debug:print 0 *default-log-port* msg + ", error: " ((condition-property-accessor 'exn 'message) exn) + ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) + ", location: " ((condition-property-accessor 'exn 'location) exn) + )) + (debug:print 0 *default-log-port* msg "(note: exn="exn", is not a condition object."))))) + (condition-case + (let-values (((inp oup)(tcp-connect host port))) + (let ((res (if (and inp oup) + (begin + (serialize dat oup) + (close-output-port oup) + (deserialize inp)) + ))) + (close-input-port inp) + (match res + ((result exn-result stdout-result) + (if exn-result + (full-err-print exn-result "ERROR: Server side exception detected")) + (if stdout-result + (debug:print 0 *default-log-port* "ERROR: Output detected on stdout on server side execution => "stdout-result)) + result) + (else + (debug:print 0 *default-log-port* "ERROR: server returned non-standard output: "res) + #f)))) + (exn (io-error) + (full-err-print exn "ERROR: i/o error") + (tt:backoff-incr host port) + #f) + (exn (i/o net) + (if ping-mode + #f + (cond + ((> tries-remaining 4) ;; server likely defunct + (tt:backoff-incr host port) + #f) + ((>= tries-remaining 0) + (let* ((backoff-delay (max (* (- 26 tries-remaining) 0.1) 1.0))) + (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.") + (thread-sleep! backoff-delay) + (tt:backoff-incr host port) + (retry)) + ;; (assert #f "FATAL: Too many retries in tt:send-receive-direct") + ) + (else #f)))) + (exn () + (full-err-print exn "Unhandled exception from client side.") + #f)))) + + +;;====================================================================== +;; server +;;====================================================================== + +(define (tt:sync-dbs ttdat) + #f) + +;; start the listener and start responding to requests +;; +;; NOTE: organise by dbfname, not run-id so we don't need +;; to pull in more modules +;; +;; This is the routine called in megatest.scm to start a server +;; +;; Server viability is checked in keep-running. Blindly start and run here. +;; +(define (tt:start-server areapath run-id dbfname-in handler keys) + (assert areapath "FATAL: areapath not provided for tt:start-server") + ;; is there already a server for this dbfile? Then exit. + (let* ((ttdat (make-tt areapath: areapath)) + (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) + (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead + (if (> (length servers) 4) + (begin + (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") + (exit)) + (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys))) + (tt-handler-set! ttdat (handler dbstruct)) + (let* ((tcp-thread (make-thread + (lambda () + (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data + "tcp-server-thread")) + (run-thread (make-thread + (lambda () + (tt:keep-running ttdat dbfname dbstruct))))) + (thread-start! tcp-thread) + (thread-start! run-thread) + (thread-join! run-thread) ;; run thread will exit on timeout or other conditions + (exit)))))) + +(define (tt:keep-running ttdat dbfname dbstruct) + ;; verfiy conn for ready + ;; listener socket has been started by this stage + ;; wait for a port before creating the registration file + ;; + (let* ((db-locked-in #f) + (areapath (tt-areapath ttdat)) + (nosyncdbpath (conc areapath"/.mtdb")) + (cleanup (lambda () + (if (tt-cleanup-proc ttdat) + ((tt-cleanup-proc ttdat))) + (dbfile:with-no-sync-db nosyncdbpath + (lambda (db) + (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct))) + (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname) + (db:no-sync-del! db dbfname) + #;(if dbtmpname + (delete-file dbtmpname)))))))) + (set! *server-info* ttdat) + (let loop ((count 0)) + (if (> count 240) + (begin + (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") + (exit 1)) + (if (not (tt-port ttdat)) ;; no connection yet + (begin + (thread-sleep! 0.25) + (loop (+ count 1)))))) + + (tt:create-server-registration-file ttdat dbfname) + ;; now start watching the last-access, if it hasn't been touched + ;; in over ten seconds we exit + (thread-sleep! 0.05) ;; any real need for delay here? + (let loop () + (let* ((servers (tt:get-server-info-sorted ttdat dbfname)) + (ok (cond + ((null? servers) #f) ;; not ok + ((equal? (list-ref (car servers) 6) ;; compare the servinfofile + (tt-servinf-file ttdat)) + (let* ((res (if db-locked-in + #t + (let* ((lock-result ;; this is the primary lock - need to double verify that got it + (dbfile:with-no-sync-db + nosyncdbpath + (lambda (db) + (db:no-sync-lock-and-check db dbfname + (tt-servinf-file ttdat) + ;; (dbr:dbstruct-dbtmpname dbstruct) + )))) + (success (car lock-result))) + (if success + (begin + (tt-state-set! ttdat 'running) + (debug:print 0 *default-log-port* "Got server lock for " dbfname) + (set! db-locked-in #t) + #t) + (begin + (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) + #f)))))) + (if (and res (common:low-noise-print 120 "top server message")) + (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for " + dbfname" on "(tt-host ttdat)":"(tt-port ttdat))) + res)) + (else + (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) + (let* ((leadsrv (car servers))) + (match leadsrv + ((host port startseconds server-id pid dbfname servinfofile) + (let* ((result (tt:timed-ping host port server-id)) + (res (car result)) + (ping (cdr result))) + (debug:print-info 0 *default-log-port* "Ping to "host":"port", with server-id "server-id + ", and file "servinfofile" returned "res) + (if res + #f ;; not the server, but all good, want to exit + (if (and (file-exists? servinfofile) + (> (- (current-seconds)(file-modification-time servinfofile)) 30)) + (begin + ;; can't ping and file has been on disk 15 seconds, go ahead and try to remove it + (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile) + (handle-exceptions + exn + (debug:print-info 0 *default-log-port* "Error removing server info file: "servinfofile) + (delete-file* servinfofile) + ) + #t) ;; not the server but the server is not reachable + (begin + (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", will try again.") + (thread-sleep! 1) ;; just because + #t))))) + (else ;; should never get here + (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv) + (assert #f "Bad server record "leadsrv)))))))) + (if ok + (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access + (begin + (debug:print 0 *default-log-port* "Exiting immediately") + (cleanup) + (exit))) + + (let* ((last-update (dbr:dbstruct-last-update dbstruct)) + (curr-secs (current-seconds))) + (if (and (eq? (tt-state ttdat) 'running) + (> (- curr-secs last-update) 3)) ;; every 3-4 seconds update the db? + (begin + (set! (file-modification-time (tt-servinf-file ttdat)) (current-seconds)) + ((dbr:dbstruct-sync-proc dbstruct) last-update) + (dbr:dbstruct-last-update-set! dbstruct curr-secs)))) + + (if (< (- (current-seconds) (tt-last-access ttdat)) (tt-server-timeout-param)) + (begin + (thread-sleep! 5) + (loop))))) + (cleanup) + (debug:print 0 *default-log-port* "INFO: Server timed out, exiting."))) + + +;; ;; given an already set up uconn start the cmd-loop +;; ;; +;; (define (tt:cmd-loop ttdat) +;; (let* ((serv-listener (-socket uconn)) +;; (listener (lambda () +;; (let loop ((state 'start)) +;; (let-values (((inp oup)(tcp-accept serv-listener))) +;; ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params) +;; (resp (ulex-handler uconn rdat))) +;; (serialize resp oup) +;; (close-input-port inp) +;; (close-output-port oup) +;; ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP +;; ) +;; (loop state)))))) +;; ;; start N of them +;; (let loop ((thnum 0) +;; (threads '())) +;; (if (< thnum 100) +;; (let* ((th (make-thread listener (conc "listener" thnum)))) +;; (thread-start! th) +;; (loop (+ thnum 1) +;; (cons th threads))) +;; (map thread-join! threads))))) +;; +;; +;; +;; (define (wait-and-close uconn) +;; (thread-join! (udat-cmd-thread uconn)) +;; (tcp-close (udat-socket uconn))) +;; +;; + +(define (tt:shutdown-server ttdat) + (let* ((cleanproc (tt-cleanup-proc ttdat)) + (port (tt-port ttdat))) + (tt-state-set! ttdat 'shutdown) + (portlogger:open-run-close portlogger:set-port port "released") + (if cleanproc (cleanproc)) + (tcp-close (tt-socket ttdat)) ;; close up ports here + )) + +;; (define (wait-and-close uconn) +;; (thread-join! (tt-cmd-thread uconn)) +;; (tcp-close (tt-socket uconn))) + +;; return servid +;; side-effects: +;; ttdat-cleanup-proc is populated with function to remove the serverinfo file +(define (tt:create-server-registration-file ttdat dbfname) + (let* ((areapath (tt-areapath ttdat)) + (servdir (tt:get-servinfo-dir areapath)) + (host (tt-host ttdat)) + (port (tt-port ttdat)) + (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) + (serv-id (tt:mk-signature areapath)) + (clean-proc (lambda () + (delete-file* servinf) + ))) + (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) + (tt-cleanup-proc-set! ttdat clean-proc) + (tt-servinf-file-set! ttdat servinf) + (with-output-to-file servinf + (lambda () + (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) + serv-id)) + +;; find valid server +;; get servers listed, last part of name must match : +;; if more than one, wait one second and look again +;; future: ping oldest, if alive remove other : files +;; +(define (tt:find-server areapath dbfname) + (let* ((servdir (tt:get-servinfo-dir areapath)) + (sfiles (glob (conc servdir"/*:"dbfname)))) + sfiles)) + +;; given a path to a server info file return: host port startseconds server-id pid dbfname logf +;; example of what it's looking for in the log file: +;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 +;; +(define (tt:server-get-info logf) + (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id + (dbprep-rx (regexp "^SERVER: dbprep")) + (dbprep-found 0) + (bad-dat (list #f #f #f #f #f #f logf))) + (let ((fdat (handle-exceptions + exn + (begin + ;; WARNING: this is potentially dangerous to blanket ignore the errors + (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn="(condition->list exn)) + '()) ;; no idea what went wrong, call it a bad server, return empty list + (with-input-from-file logf read-lines)))) + (if (null? fdat) ;; bad data, return bad-dat + bad-dat + (let loop ((inl (car fdat)) + (tail (cdr fdat)) + (lnum 0)) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl))) + (if dbprep (set! dbprep-found 1)) + (if (not mlst) + (if (> lnum 500) ;; give up if more than 500 lines of server log read + bad-dat + (if (null? tail) + bad-dat + (loop (car tail)(cdr tail)(+ lnum 1)))) + (match mlst ;; have a not null list + ((_ host port start server-id pid dbfname) + (list host + (string->number port) + (string->number start) + server-id + (string->number pid) + dbfname + logf)) + (else + (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst) + bad-dat))))))))) + +;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; if the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area + (assert areapath "FATAL: tt:server-process-run called without areapath defined.") + (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.") + (assert mtexe "FATAL: tt:server-process-run called without mtexe defined.") + ;; mtest -server - -m testsuite:ext-tests -db 6.db + (let* ((dbfname (dbmod:run-id->dbfname run-id)) + (load (get-normalized-cpu-load)) + (trying (length (tt:find-server areapath dbfname))) + (nrun (number-of-processes-running (conc "mtest.*server.*"testsuite".*"dbfname)))) + (cond + ((> load 2.0) + (debug:print 0 *default-log-port* "Normalized load "load" on " (get-host-name) " is over the limit of 2.0. Not starting a server.") + (thread-sleep! 1)) + ((> nrun 100) + (debug:print 0 *default-log-port* nrun" servers running on " (get-host-name) ", not starting another.") + (thread-sleep! 1)) + ((> trying 4) + (debug:print 0 *default-log-port* trying" servers registered in .servinfo dir. not starting another.") + (thread-sleep! 1)) + (else + (if (not (file-exists? (conc areapath"/logs"))) + (create-directory (conc areapath"/logs") #t)) + (let* ((logfile (conc areapath "/logs/server-"dbfname"-"(current-process-id)".log")) ;; -" curr-pid "-" target-host ".log")) + (cmdln (conc + mtexe + " -server - ";; (or target-host "-") + " -m testsuite:" testsuite + ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this + " -db " dbfname ;; (dbmod:run-id->dbfname run-id) + " " profile-mode + ))) ;; (conc " >> " logfile " 2>&1 &"))))) + ;; we want the remote server to start in *toppath* so push there + ;; (push-directory areapath) ;; use cd in the command line instead + (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath) + ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) + (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... + (setenv "NBFAKE_LOG" logfile) + (system (conc "cd "areapath" ; nbfake " cmdln)) + (unsetenv "NBFAKE_QUIET") + (unsetenv "NBFAKE_LOG") + ;;(pop-directory) + ))))) + +;;====================================================================== +;; tcp connection stuff +;;====================================================================== + +;; find a port and start tcp-server. This only starts the tcp portion of +;; the server, look at (tt:start-server ...) above for the entry point +;; for the entire server system +;; +(define (tt:start-tcp-server ttdat) + (setup-listener-portlogger ttdat) ;; set up tcp-listener + (let* ((socket (tt-socket ttdat)) + (handler (tt-handler ttdat)) ;; the handler comes from our client setting a handler function + (handler-proc (lambda () + (let* ((indat (deserialize)) ;; could use: (thread-terminate! (current-thread)) + (result #f) + (exn-result #f) + (stdout-result (with-output-to-string + (lambda () + (let ((res (handle-exceptions + exn + (let* ((errdat (condition->list exn))) + (set! exn-result errdat) + (debug:print 0 *default-log-port* "ERROR: handler exception, these are bad, will exit in five seconds.") + (pp errdat *default-log-port*) + ;; these are always bad, set up an exit thread + (thread-start! (make-thread (lambda () + (thread-sleep! 5) + (exit)))) + #f) + (handler indat) ;; this is the proc being called by the remote client + ))) + (set! result res))))) + (full-result (list result exn-result (if (equal? stdout-result "") #f stdout-result)))) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Serialization failure. full-result="full-result) + (thread-start! (make-thread (lambda () + (thread-sleep! 5) + (exit))))) ;; (serialize '(#f #f #f)) ;; doesn't work - the first call to serialize caused failure + (serialize full-result)))))) + ((make-tcp-server socket handler-proc) + #f ;; yes, send error messages to std-err + ))) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +;; (define (setup-listener uconn #!optional (port 4242)) +;; (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) +;; (handle-exceptions +;; exn +;; (if (< port 65535) +;; (begin +;; (thread-sleep! 0.25) +;; (setup-listener uconn (+ port 1))) +;; #f) +;; (connect-listener uconn port))) + +(define (setup-listener-portlogger uconn) + (let ((port (portlogger:open-run-close portlogger:find-port))) + (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) + (handle-exceptions + exn + (if (< port 65535) + (begin + (portlogger:open-run-close portlogger:set-failed port) + (thread-sleep! 0.25) + (setup-listener-portlogger uconn)) + #f) + (connect-listener uconn port)))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 10000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (tt-port-set! uconn port) + (tt-host-set! uconn addr) + (tt-host-port-set! uconn (conc addr":"port)) + (tt-socket-set! uconn tlsn) + uconn)) + +;;====================================================================== +;; utils +;;====================================================================== + +;; Generate a unique signature for this server +(define (tt:mk-signature areapath) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list areapath + (current-process-id) + (argv))))))) + + +(define (tt:get-best-guess-address hostname) + (let ((res #f)) + (for-each + (lambda (adr) + (if (not (eq? (u8vector-ref adr 0) 127)) + (set! res adr))) + ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME + (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) + (string-intersperse + (map number->string + (u8vector->list + (if res res (hostname->ip hostname)))) "."))) + +(define (tt:get-servinfo-dir areapath) + (let* ((spath (conc areapath"/.servinfo"))) + (if (not (file-exists? spath)) + (create-directory spath #t)) + spath)) + +;;====================================================================== +;; network utilities +;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + +(define (get-my-best-address) + (let ((all-my-addresses (get-all-ips))) + (cond + ((null? all-my-addresses) + (get-host-name)) ;; no interfaces? + ((eq? (length all-my-addresses) 1) + (car all-my-addresses)) ;; only one to choose from, just go with it + (else + (car (sort all-my-addresses ip-pref-less?)))))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips) + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +) Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -20,22 +20,30 @@ ;;====================================================================== ;; Database access ;;====================================================================== +(declare (unit tdb)) +(declare (uses debugprint)) +(declare (uses common)) +(declare (uses keys)) +(declare (uses ods)) +(declare (uses mt)) +(declare (uses db)) +(declare (uses commonmod)) +(declare (uses mtargs)) +(declare (uses rmtmod)) + (require-extension (srfi 18) extras tcp) (use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) -(declare (unit tdb)) -(declare (uses common)) -(declare (uses keys)) -(declare (uses ods)) -(declare (uses client)) -(declare (uses mt)) -(declare (uses db)) +(import commonmod + debugprint + rmtmod + (prefix mtargs args:)) (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -21,25 +21,24 @@ ;;====================================================================== ;; Tests ;;====================================================================== (declare (unit tests)) -(declare (uses lock-queue)) (declare (uses db)) (declare (uses tdb)) +(declare (uses debugprint)) (declare (uses common)) (declare (uses commonmod)) -;; (declare (uses dcommon)) ;; needed for the steps processing (declare (uses items)) (declare (uses runconfig)) -;; (declare (uses sdb)) (declare (uses server)) -;;(declare (uses stml2)) +(declare (uses mtargs)) +(declare (uses rmtmod)) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking directory-utils) (import (prefix sqlite3 sqlite3:)) -(import commonmod) +(import commonmod (prefix mtargs args:) debugprint rmtmod) (require-library stml) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") @@ -1966,13 +1965,13 @@ ;;====================================================================== ;; teststep-set-status! used to be here (define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) - (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) + (let* ((testdat (rmt:get-test-state-status-by-id run-id test-id))) (and testdat - (equal? (test:get-state testdat) "KILLREQ")))) + (equal? (car testdat) "KILLREQ")))) (define (test:tdb-get-rundat-count tdb) (if tdb (let ((res 0)) (sqlite3:for-each-row ADDED transport-mode.scm.template Index: transport-mode.scm.template ================================================================== --- /dev/null +++ transport-mode.scm.template @@ -0,0 +1,22 @@ +;;====================================================================== +;; set up transport, db cache and sync methods +;; +;; sync-method: 'original, 'attach or 'none +;; cache-method: 'tmp 'none +;; rmt:transport-mode: 'http, 'tcp, 'nfs +;; +;; NOTE: NOT ALL COMBINATIONS WORK +;; +;;====================================================================== + +;; uncomment this block to test without tcp +;; (dbfile:sync-method 'none) +;; (dbfile:cache-method 'none) +;; (rmt:transport-mode 'nfs) + +;; uncomment this block to test with tcp +(dbfile:sync-method 'original) ;; attach) ;; original +(dbfile:cache-method 'tmp) +(rmt:transport-mode 'tcp) + + Index: tree.scm ================================================================== --- tree.scm +++ tree.scm @@ -16,27 +16,29 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== +(declare (unit tree)) +(declare (uses mtargs)) +(declare (uses debugprint)) +(declare (uses launch)) +(declare (uses gutils)) +(declare (uses db)) +(declare (uses server)) +(declare (uses dcommon)) + (use format) (require-library iup) (import (prefix iup iup:)) (use canvas-draw) (use sqlite3 srfi-1 posix regex regex-case srfi-69) (import (prefix sqlite3 sqlite3:)) -(declare (unit tree)) -(declare (uses margs)) -(declare (uses launch)) -;; (declare (uses megatest-version)) -(declare (uses gutils)) -(declare (uses db)) -(declare (uses server)) -;; (declare (uses synchash)) -(declare (uses dcommon)) +(import (prefix mtargs args:) + debugprint) (include "megatest-version.scm") (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") DELETED ulex.scm Index: ulex.scm ================================================================== --- ulex.scm +++ /dev/null @@ -1,24 +0,0 @@ -;;====================================================================== -;; Copyright 2019, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -;;====================================================================== - -(declare (unit ulex)) -(declare (uses pkts)) - -(include "ulex/ulex.scm") ADDED ulex/dbmgr.scm Index: ulex/dbmgr.scm ================================================================== --- /dev/null +++ ulex/dbmgr.scm @@ -0,0 +1,1131 @@ +;;====================================================================== +;; Copyright 2022, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbmgrmod)) + +(declare (uses ulex)) +(declare (uses apimod)) +(declare (uses pkts)) +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses portloggermod)) +(declare (uses debugprint)) + +(module dbmgrmod + * + +(import scheme + chicken.base + chicken.condition + chicken.file + chicken.format + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + + (prefix sqlite3 sqlite3:) + matchable + md5 + message-digest + regex + s11n + srfi-1 + srfi-18 + srfi-69 + system-information + typed-records + + pkts + ulex + + commonmod + apimod + dbmod + debugprint + (prefix mtargs args:) + portloggermod + ) + +;; Configurations for server +;; (tcp-buffer-size 2048) +;; (max-connections 2048) + +;; info about me as a listener and my connections to db servers +;; stored (for now) in *db-serv-info* +;; +(defstruct servdat + (host #f) + (port #f) + (uuid #f) + (dbfile #f) + (uconn #f) ;; this is the listener *FOR THIS PROCESS* + (mode #f) + (status 'starting) + (trynum 0) ;; count the number of ports we've tried + (conns (make-hash-table)) ;; apath/dbname => conndat + ) + +(define *db-serv-info* (make-servdat)) + +(define (servdat->url sdat) + (conc (servdat-host sdat)":"(servdat-port sdat))) + +;; db servers contact info +;; +(defstruct conndat + (apath #f) + (dbname #f) + (fullname #f) + (hostport #f) + (ipaddr #f) + (port #f) + (srvpkt #f) + (srvkey #f) + (lastmsg 0) + (expires 0)) + +(define *srvpktspec* + `((server (host . h) + (port . p) + (servkey . k) + (pid . i) + (ipaddr . a) + (dbpath . d)))) + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +;; set up the api proc, seems like there should be a better place for this? +;; +;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE +;; +;; (define api-proc (make-parameter conc)) +;; (api-proc api:execute-requests) + +;; do we have a connection to apath dbname and +;; is it not expired? then return it +;; +;; else setup a connection +;; +;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception +;; +(define (rmt:get-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-ref/default (servdat-conns remdat) fullname #f))) + +(define (rmt:drop-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname))) + (hash-table-delete! (servdat-conns remdat) fullname))) + +(define (rmt:find-main-server uconn apath dbname) + (let* ((pktsdir (get-pkts-dir apath)) + (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) + (viable-srvs (get-viable-servers all-srvpkts dbname))) + (get-the-server uconn apath viable-srvs))) + + +(define *connstart-mutex* (make-mutex)) +(define *last-main-start* 0) + +;; looks for a connection to main, returns if have and not exired +;; creates new otherwise +;; +;; connections for other servers happens by requesting from main +;; +;; TODO: This is unnecessarily re-creating the record in the hash table +;; +(define (rmt:open-main-connection remdat apath) + (let* ((fullpath (db:dbname->path apath ".db/main.db")) + (conns (servdat-conns remdat)) + (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this + (start-rmt:run (lambda () + (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server"))) + (thread-start! th1) + (thread-sleep! 1) + (let loop ((count 0)) + (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection") + (if (or (not *db-serv-info*) + (not (servdat-uconn *db-serv-info*))) + (begin + (thread-sleep! 1) + (loop (+ count 1))) + (begin + (servdat-mode-set! *db-serv-info* 'non-db) + (servdat-uconn *db-serv-info*))))))) + (myconn (servdat-uconn *db-serv-info*))) + (cond + ((not myconn) + (start-rmt:run) + (rmt:open-main-connection remdat apath)) + ((and conn ;; conn is NOT a socket, just saying ... + (< (current-seconds) (conndat-expires conn))) + #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died + ((and conn + (>= (current-seconds)(conndat-expires conn))) + (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") + (rmt:drop-conn remdat apath ".db/main.db") ;; + (rmt:open-main-connection remdat apath)) + (else + ;; Below we will find or create and connect to main + (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch") + (let* ((dbname (db:run-id->dbname #f)) + (the-srv (rmt:find-main-server myconn apath dbname)) + (start-main-srv (lambda () ;; call IF there is no the-srv found + (mutex-lock! *connstart-mutex*) + (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server + (begin + (api:run-server-process apath dbname) + (set! *last-main-start* (current-seconds)) + (thread-sleep! 1)) + (thread-sleep! 0.25)) + (mutex-unlock! *connstart-mutex*) + (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries + ))) + (if (not the-srv) ;; have server, try connecting to it + (start-main-srv) + (let* ((srv-addr (server-address the-srv)) ;; need serv + (ipaddr (alist-ref 'ipaddr the-srv)) + (port (alist-ref 'port the-srv)) + (srvkey (alist-ref 'servkey the-srv)) + (fullpath (db:dbname->path apath dbname)) + + (new-the-srv (make-conndat + apath: apath + dbname: dbname + fullname: fullpath + hostport: srv-addr + ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvpkt: the-srv + srvkey: srvkey ;; generated by rmt:get-signature on the server side + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2) ;; this needs to be gathered during the ping + ))) + (hash-table-set! conns fullpath new-the-srv))) + #t))))) + +;; NB// sinfo is a servdat struct +;; +(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5)) + (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") + (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable + (fullname (db:dbname->path apath dbname)) + (conns (servdat-conns sinfo)) + (mconn (rmt:get-conn sinfo apath ".db/main.db")) + (dconn (rmt:get-conn sinfo apath dbname))) + #;(if (and mconn + (not (debug:print-logger))) + (begin + (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") + (debug:print-logger rmt:log-to-main))) + (cond + ((and mconn + dconn + (< (current-seconds)(conndat-expires dconn))) + #t) ;; good to go + ((not mconn) ;; no channel open to main? open it... + (rmt:open-main-connection sinfo apath) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + ((not dconn) ;; no channel open to dbname? + (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname)))) + (case res + ((server-started) + (if (> num-tries 0) + (begin + (thread-sleep! 2) + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + (begin + (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) + (exit 1)))) + (else + (if (list? res) ;; server has been registered and the info was returned. pass it on. + (begin ;; ("192.168.0.9" 53817 + ;; "5e34239f48e8973b3813221e54701a01" "24310" + ;; "192.168.0.9" + ;; "/home/matt/data/megatest/tests/simplerun" + ;; ".db/1.db") + (match + res + ((host port servkey pid ipaddr apath dbname) + (debug:print-info 0 *default-log-port* "got "res) + (hash-table-set! conns + fullname + (make-conndat + apath: apath + dbname: dbname + hostport: (conc host":"port) + ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection? + ipaddr: ipaddr + port: port + srvkey: servkey + lastmsg: (current-seconds) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2)))) + (else + (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) + res) + (begin + (debug:print-info 0 *default-log-port* "Unexpected result: " res) + res))))))) + #t)) + +;;====================================================================== + +;; FOR DEBUGGING SET TO #t +;; (define *localmode* #t) +(define *localmode* #f) +(define *dbstruct* (make-dbr:dbstruct)) + +;; Defaults to current area +;; +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) + (let* ((apath *toppath*) + (sinfo *db-serv-info*) + (dbname (db:run-id->dbname rid))) + (if *localmode* + (api:execute-requests *dbstruct* cmd params) + (begin + (rmt:open-main-connection sinfo apath) + (if rid (rmt:general-open-connection sinfo apath dbname)) + #;(if (not (member cmd '(log-to-main))) + (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) + (rmt:send-receive-real sinfo apath dbname cmd params))))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future +;; +(define (rmt:send-receive-real sinfo apath dbname cmd params) + (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.") + (let* ((cdat (rmt:get-conn sinfo apath dbname))) + (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") + (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex + ;; then send-receive using the ulex layer to host-port stored in cdat + (res (send-receive uconn (conndat-hostport cdat) cmd params)) + #;(th1 (make-thread (lambda () + (set! res (send-receive uconn (conndat-hostport cdat) cmd params))) + "send-receive thread"))) + ;; (thread-start! th1) + ;; (thread-join! th1) ;; gratuitious thread stuff is so that mailbox is not used in primordial thead + ;; since we accessed the server we can bump the expires time up + (conndat-expires-set! cdat (+ (current-seconds) + (server:expiration-timeout) + -10)) ;; ten second margin for network time misalignments etc. + res))) + +;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed +;; sometime in the future. +;; +;; Purpose - call the main.db server and request a server be started +;; for the given area path and dbname +;; + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +;; host and port are used to ensure we are remove proper records +(define (rmt:server-shutdown host port) + (let ((dbfile (servdat-dbfile *db-serv-info*))) + (debug:print-info 0 *default-log-port* "dbfile is "dbfile) + (if dbfile + (let* ((am-server (args:get-arg "-server")) + (dbfile (args:get-arg "-db")) + (apath *toppath*) + #;(sinfo *remotedat*)) ;; foundation for future fix + (if *dbstruct-db* + (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) + (db (dbr:dbdat-db dbdat)) + (inmem (dbr:dbdat-db dbdat)) ;; WRONG + ) + ;; do a final sync here + (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) + (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) + ;; let's finalize here + (debug:print-info 0 *default-log-port* "Finalizing db and inmem") + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) + (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) + (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") + (if (string-match ".*/main.db$" dbfile) + (let ((pkt-file (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) + (delete-file* pkt-file) + (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) + (db:with-lock-db + (servdat-dbfile *db-serv-info*) + (lambda (dbh dbfile) + (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove + (let* ((sdat *db-serv-info*) ;; we have a run-id server + (host (servdat-host sdat)) + (port (servdat-port sdat)) + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) + (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) + ))))))) + + +(define (common:run-sync?) + ;; (and (common:on-homehost?) + (args:get-arg "-server")) + +(define *rmt:run-mutex* (make-mutex)) +(define *rmt:run-flag* #f) + +;; Main entry point to start a server. was start-server +(define (rmt:run hostn) + (mutex-lock! *rmt:run-mutex*) + (if *rmt:run-flag* + (begin + (debug:print-warn 0 *default-log-port* "rmt:run already running.") + (mutex-unlock! *rmt:run-mutex*)) + (begin + (set! *rmt:run-flag* #t) + (mutex-unlock! *rmt:run-mutex*) + ;; ;; Configurations for server + ;; (tcp-buffer-size 2048) + ;; (max-connections 2048) + (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") + (if (and *db-serv-info* + (servdat-uconn *db-serv-info*)) + (let* ((uconn (servdat-uconn *db-serv-info*))) + (wait-and-close uconn)) + (let* ((port (portlogger:open-run-close portlogger:find-port)) + (handler-proc (lambda (rem-host-port qrykey cmd params) ;; + (set! *db-last-access* (current-seconds)) + (assert (list? params) "FATAL: handler called with non-list params") + (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params) + (debug:print 0 *default-log-port* "handler call: "cmd", params="params) + (api:execute-requests *dbstruct-db* cmd params)))) + ;; (api:process-request *dbstuct-db* + (if (not *db-serv-info*) + (set! *db-serv-info* (make-servdat host: hostn port: port))) + (let* ((uconn (run-listener handler-proc port)) + (rport (udat-port uconn))) ;; the real port + (servdat-host-set! *db-serv-info* hostn) + (servdat-port-set! *db-serv-info* rport) + (servdat-uconn-set! *db-serv-info* uconn) + (wait-and-close uconn) + (db:print-current-query-stats) + ))) + (let* ((host (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (mode (or (servdat-mode *db-serv-info*) + "non-db"))) + ;; server exit stuff here + ;; (rmt:server-shutdown host port) - always do in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit + (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting") + )))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + + +;;====================================================================== +;; NEW SERVER METHOD +;;====================================================================== + +;; only use for main.db - need to re-write some of this :( +;; +(define (get-lock-db sdat dbfile host port) + (assert host "FATAL: get-lock-db called with host not set.") + (assert port "FATAL: get-lock-db called with port not set.") + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile host port)) + (uconn (servdat-uconn sdat))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? uconn (conc owner_host":"owner_port) "abc") + #f ;; locked by someone else + (begin ;; locked by someone dead and gone + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) + (sqlite3:finalize! dbh) + res)) + + +(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) + (let* ((pkt-dat `((host . ,host) + (port . ,port) + (servkey . ,servkey) + (pid . ,(current-process-id)) + (ipaddr . ,ipaddr) + (dbpath . ,dbpath))) + (uuid (write-alist->pkt + pkts-dir + pkt-dat + pktspec: pkt-spec + ptype: 'server))) + (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) + uuid)) + +(define (get-pkts-dir #!optional (apath #f)) + (let* ((effective-toppath (or *toppath* apath))) + (assert effective-toppath + "ERROR: get-pkts-dir called without *toppath* set. Exiting.") + (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) + (if (file-exists? pdir) + pdir + (begin + (handle-exceptions ;; this exception handler should NOT be needed but ... + exn + pdir + (create-directory pdir #t)) + pdir))))) + +;; given a pkts dir read +;; +(define (get-all-server-pkts pktsdir-in pktspec) + (let* ((pktsdir (if (file-exists? pktsdir-in) + pktsdir-in + (begin + (create-directory pktsdir-in #t) + pktsdir-in))) + (all-pkt-files (glob (conc pktsdir "/*.pkt")))) + (map (lambda (pkt-file) + (read-pkt->alist pkt-file pktspec: pktspec)) + all-pkt-files))) + +(define (server-address srv-pkt) + (conc (alist-ref 'host srv-pkt) ":" + (alist-ref 'port srv-pkt))) + +(define (server-ready? uconn host-port key) ;; server-address is host:port + (let* ((params `((cmd . ping)(key . ,key))) + (data `((cmd . ping) + (key . ,key) + (params . ,params))) ;; I don't get it. + (res (send-receive uconn host-port 'ping data))) + (if (eq? res 'ack) ;; yep, likely it is who we want on the other end + res + #f))) +;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f)))) + +; from the pkts return servers associated with dbpath +;; NOTE: Only one can be alive - have to check on each +;; in the list of pkts returned +;; +(define (get-viable-servers serv-pkts dbpath) + (let loop ((tail serv-pkts) + (res '())) + (if (null? tail) + res ;; NOTE: sort by age so oldest is considered first + (let* ((spkt (car tail))) + (loop (cdr tail) + (if (equal? dbpath (alist-ref 'dbpath spkt)) + (cons spkt res) + res)))))) + +(define (remove-pkts-if-not-alive uconn serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (host-port (conc host":"port)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (server-ready? uconn host-port key))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) + +;; from viable servers get one that is alive and ready +;; +(define (get-the-server uconn apath serv-pkts) + (let loop ((tail serv-pkts)) + (if (null? tail) + #f + (let* ((spkt (car tail)) + (host (alist-ref 'ipaddr spkt)) + (port (alist-ref 'port spkt)) + (host-port (conc host":"port)) + (dbpth (alist-ref 'dbpath spkt)) + (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt)) + (addr (server-address spkt))) + (if (server-ready? uconn host-port srvkey) + spkt + (loop (cdr tail))))))) + +;; am I the "first" in line server? I.e. my D card is smallest +;; use Z card as tie breaker +;; +(define (get-best-candidate serv-pkts dbpath) + (if (null? serv-pkts) + #f + (let loop ((tail serv-pkts) + (best (car serv-pkts))) + (if (null? tail) + best + (let* ((candidate (car tail)) + (candidate-bd (string->number (alist-ref 'D candidate))) + (best-bd (string->number (alist-ref 'D best))) + ;; bigger number is younger + (candidate-z (alist-ref 'Z candidate)) + (best-z (alist-ref 'Z best)) + (new-best (cond + ((> best-bd candidate-bd) ;; best is younger than candidate + candidate) + ((< best-bd candidate-bd) ;; candidate is younger than best + best) + (else + (if (string>=? best-z candidate-z) + best + candidate))))) ;; use Z card as tie breaker + (if (null? tail) + new-best + (loop (cdr tail) new-best))))))) + + +;;====================================================================== +;; END NEW SERVER METHOD +;;====================================================================== + +;; if .db/main.db check the pkts +;; +(define (rmt:wait-for-server pkts-dir db-file server-key) + (let* ((sdat *db-serv-info*)) + (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *db-serv-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (let* ((uconn (servdat-uconn sdat))) + (servdat-status-set! sdat 'iface-stable) + (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server") + ;; create a server pkt in *toppath*/.meta/srvpkts + + ;; TODO: + ;; 1. change sdat to stuct + ;; 2. add uuid to struct + ;; 3. update uuid in sdat here + ;; + (servdat-uuid-set! sdat + (register-server + pkts-dir *srvpktspec* + (get-host-name) + (servdat-port sdat) server-key + (servdat-host sdat) db-file)) + ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key + ;; now read pkts and see if we are a contender + (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) + (viables (get-viable-servers all-pkts db-file)) + (alive (remove-pkts-if-not-alive uconn viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *db-serv-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) + ;; am I the best-srv, compare server-keys to know + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (begin + (debug:print-info 0 *default-log-port* "I'm the server!") + (servdat-dbfile-set! sdat db-file) + (servdat-status-set! sdat 'db-locked)) + (begin + (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + (begin + (debug:print-info 0 *default-log-port* + "Keys do not match "best-srv-key", "server-key", exiting.") + (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) + (thread-sleep! 0.2) + (exit))) + sdat)) + (begin ;; sdat not yet contains server info + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat)))))))) + +(define (rmt:register-server sinfo apath iface port server-key dbname) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'register-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:get-count-servers sinfo apath) + (servdat-conns sinfo) ;; just checking types + (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db + (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) + +(define (rmt:deregister-server db-serv-info apath iface port server-key dbname) + (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db + (rmt:send-receive-real db-serv-info apath ;; params: host port servkey pid ipaddr dbpath + (db:run-id->dbname #f) + 'deregister-server `(,iface + ,port + ,server-key + ,(current-process-id) + ,iface + ,apath + ,dbname))) + +(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) + ;; wait until *db-serv-info* stops changing + (let* ((stime (current-seconds))) + (let loop ((last-host #f) + (last-port #f) + (tries 0)) + (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*))) + (curr-port (and *db-serv-info* (servdat-port *db-serv-info*)))) + ;; first we verify port and interface, update *db-serv-info* in need be. + (cond + ((> tries num-tries-allowed) + (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") + (exit 1)) + ((not *db-serv-info*) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not last-host)(not last-port)) + (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((or (not (equal? last-host curr-host)) + (not (equal? last-port curr-port))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (thread-sleep! 0.25) + (loop curr-host curr-port (+ tries 1))) + ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed + (thread-sleep! 0.5) + (loop curr-host curr-port (+ tries 1))) + (else + (rmt:get-signature) ;; sets *my-signature* as side effect + (servdat-status-set! *db-serv-info* 'interface-stable) + (debug:print 0 *default-log-port* + "SERVER STARTED: " curr-host + ":" curr-port + " AT " (current-seconds) " server signature: " *my-signature* + " with "(servdat-trynum *db-serv-info*)" port changes") + (flush-output *default-log-port*) + #t)))))) + +;; run rmt:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (rmt:keep-running dbname) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + + (let* ((sinfo *db-serv-info*) + (server-start-time (current-seconds)) + (pkts-dir (get-pkts-dir)) + (server-key (rmt:get-signature)) ;; This servers key + (is-main (equal? (args:get-arg "-db") ".db/main.db")) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (host port) + (set! *unclean-shutdown* #f) ;; Should not be needed anymore + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; (rmt:server-shutdown host port) -- called in on-exit + ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) + (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db")) + ;; main and run db servers have both got wait logic (could/should merge it) + (if is-main + (rmt:wait-for-server pkts-dir dbname server-key) + (rmt:wait-for-stable-interface)) + ;; this is our forever loop + (let* ((iface (servdat-host *db-serv-info*)) + (port (servdat-port *db-serv-info*)) + (uconn (servdat-uconn *db-serv-info*))) + (let loop ((count 0) + (bad-sync-count 0) + (start-time (current-milliseconds))) + (if (and (not is-main) + (common:low-noise-print 60 "servdat-status")) + (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*))) + + (mutex-lock! *heartbeat-mutex*) + ;; set up the database handle + (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate + (let ((watchdog (bdat-watchdog *bdat*))) + (debug:print 0 *default-log-port* "SERVER: dbprep") + (db:setup dbname) ;; sets *dbstruct-db* as side effect + (servdat-status-set! *db-serv-info* 'db-opened) + ;; IFF I'm not main, call into main and register self + (if (not is-main) + (let ((res (rmt:register-server sinfo + *toppath* iface port + server-key dbname))) + (if res ;; we are the server + (servdat-status-set! *db-serv-info* 'have-interface-and-db) + ;; now check that the db locker is alive, clear it out if not + (let* ((serv-info (rmt:server-info *toppath* dbname))) + (match serv-info + ((host port servkey pid ipaddr apath dbpath) + (if (not (server-ready? uconn (conc host":"port) servkey)) + (begin + (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") + (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) + (loop (+ count 1) bad-sync-count start-time)))) + (else + (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) + (exit))))))) + (debug:print 0 *default-log-port* + "SERVER: running, db "dbname" opened, megatest version: " + (common:get-full-version)) + ;; start the watchdog + + ;; is this really needed? + + #;(if watchdog + (if (not (member (thread-state watchdog) + '(ready running blocked + sleeping dead))) + (begin + (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")") + (thread-start! watchdog)) + (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")")) + (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it.")) + #;(loop (+ count 1) bad-sync-count start-time) + )) + + (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) + + (mutex-unlock! *heartbeat-mutex*) + + ;; when things go wrong we don't want to be doing the various + ;; queries too often so we strive to run this stuff only every + ;; four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (set! last-access *db-last-access*) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) bad-sync-count (current-milliseconds))) + + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence (get-host-name) port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port)) + ((and *server-run* + (or (not (timed-out?)) + (if is-main ;; do not exit if there are other servers (keep main open until all others gone) + (> (rmt:get-count-servers sinfo *toppath*) 1) + #f))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) + (loop 0 bad-sync-count (current-milliseconds))) + (else + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port) + #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " + (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown + (sexpr->string 'quit)))))))))) + +(define (rmt:get-reasonable-hostname) + (let* ((inhost (or (args:get-arg "-server") "-"))) + (if (equal? inhost "-") + (get-host-name) + inhost))) + +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; This is the point at which servers are started +;; +(define (rmt:server-launch dbname) + (debug:print-info 0 *default-log-port* "Entered rmt:server-launch") + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (rmt:run (rmt:get-reasonable-hostname))) + "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (if (args:get-arg "-server") + (rmt:keep-running dbname))) + "Keep running"))) + (thread-start! th2) + (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (thread-join! th3)) + #f) + +;;====================================================================== +;; S E R V E R - D I R E C T C A L L S +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server #f (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server #f (list run-id))) + +(define (rmt:server-info apath dbname) + (rmt:send-receive 'get-server-info #f (list apath dbname))) + +;;====================================================================== +;; Nanomsg transport +;;====================================================================== + +#;(define (is-port-in-use port-num) + (let* ((ret #f)) + (let-values (((inp oup pid) + (process "netstat" (list "-tulpn" )))) + (let loop ((inl (read-line inp))) + (if (not (eof-object? inl)) + (begin + (if (string-search (regexp (conc ":" port-num)) inl) + (begin + ;(print "Output: " inl) + (set! ret #t)) + (loop (read-line inp))))))) + ret)) + +#;(define (open-nn-connection host-port) + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port))) + (nng-dial req uri) + (socket-set! req 'nng/recvtimeo 2000) + req)) + +#;(define (send-receive-nn req msg) + (nng-send req msg) + (nng-recv req)) + +#;(define (close-nn-connection req) + (nng-close! req)) + +;; ;; open connection to server, send message, close connection +;; ;; +;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds +;; (let ((req (make-req-socket 'req)) +;; (uri (conc "tcp://" host-port)) +;; (res #f) +;; ;; (contacts (alist-ref 'contact attrib)) +;; ;; (mode (alist-ref 'mode attrib)) +;; ) +;; (socket-set! req 'nng/recvtimeo 2000) +;; (handle-exceptions +;; exn +;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) +;; ;; Send notification +;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) +;; #f) +;; (nng-dial req uri) +;; ;; (print "Connected to the server " ) +;; (nng-send req msg) +;; ;; (print "Request Sent") +;; (let* ((th1 (make-thread (lambda () +;; (let ((resp (nng-recv req))) +;; (nng-close! req) +;; (set! res (if (equal? resp "ok") +;; #t +;; #f)))) +;; "recv thread")) +;; (th2 (make-thread (lambda () +;; (thread-sleep! timeout) +;; (thread-terminate! th1)) +;; "timer thread"))) +;; (thread-start! th1) +;; (thread-start! th2) +;; (thread-join! th1) +;; res)))) +;; +#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port)) + (res #f)) + (handle-exceptions + exn + (let ((emsg ((condition-property-accessor 'exn 'message) exn))) + ;; Send notification + (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn) + #f) + (nng-dial req uri) + (nng-send req msg) + (let* ((th1 (make-thread (lambda () + (let ((resp (nng-recv req))) + (nng-close! req) + ;; (print resp) + (set! res resp))) + "recv thread")) + (th2 (make-thread (lambda () + (thread-sleep! timeout) + (thread-terminate! th1)) + "timer thread"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + res)))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; run ping in separate process, safest way in some cases +;; +#;(define (server:ping-server ifaceport) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -ping " ifaceport) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) + +;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; +#;(define (server:login toppath) + (lambda (toppath) + (set! *db-last-access* (current-seconds)) ;; might not be needed. + (if (equal? *toppath* toppath) + #t + #f))) + +;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; (define (server:release-sync-lock) +;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; (define (server:have-sync-lock?) +;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; (have-lock? (car have-lock-pair)) +;; (lock-time (cdr have-lock-pair)) +;; (lock-age (- (current-seconds) lock-time))) +;; (cond +;; (have-lock? #t) +;; ((>lock-age +;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; (server:release-sync-lock) +;; (server:have-sync-lock?)) +;; (else #f)))) + +) Index: ulex/ulex.scm ================================================================== --- ulex/ulex.scm +++ ulex/ulex.scm @@ -1,8 +1,8 @@ ;; ulex: Distributed sqlite3 db ;;; -;; Copyright (C) 2018 Matt Welland +;; Copyright (C) 2018-2021 Matt Welland ;; Redistribution and use in source and binary forms, with or without ;; modification, is permitted. ;; ;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS ;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED @@ -23,330 +23,521 @@ ;; NOTES: ;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. ;; ;;====================================================================== -(use mailbox) - -(module ulex - * - -(import scheme posix chicken data-structures ports extras files mailbox) -(import srfi-18 pkts matchable regex - typed-records srfi-69 srfi-1 - srfi-4 regex-case - (prefix sqlite3 sqlite3:) - foreign - tcp6 - ;; ulex-netutil - hostinfo - ) - -;; make it a global? Well, it is local to area module - -(define *captain-pktspec* - `((captain (host . h) - (port . p) - (pid . i) - (ipaddr . a) - ) - #;(data (hostname . h) ;; sender hostname - (port . p) ;; sender port - (ipaddr . a) ;; sender ip - (hostkey . k) ;; sending host key - store info at server under this key - (servkey . s) ;; server key - this needs to match at server end or reject the msg - (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json - (data . d) ;; base64 encoded slln data - ))) - -;; struct for keeping track of our world - -(defstruct udat - ;; captain info - (captain-address #f) - (captain-host #f) - (captain-port #f) - (captain-pid #f) - (captain-lease 0) ;; time (unix epoc) seconds when the lease is up - (ulex-dir (conc (get-environment-variable "HOME") "/.ulex")) - (cpkts-dir (conc (get-environment-variable "HOME") "/.ulex/pkts")) - (cpkt-spec *captain-pktspec*) - ;; this processes info - (my-cpkt-key #f) ;; put Z card here when I create a pkt for myself as captain - (my-address #f) - (my-hostname #f) - (my-port #f) - (my-pid (current-process-id)) - (my-dbs '()) - ;; server and handler thread - (serv-listener #f) ;; this processes server info - (handler-thread #f) - (mboxes (make-hash-table)) ;; key => mbox - ;; other servers - (peers (make-hash-table)) ;; host-port => peer record - (dbowners (make-hash-table)) ;; dbfile => host-port - (handlers (make-hash-table)) ;; dbfile => proc - ;; (outgoing-conns (make-hash-table)) ;; host:port -> conn - (work-queue (make-queue)) ;; most stuff goes here - ;; (fast-queue (make-queue)) ;; super quick stuff goes here (e.g. ping) - (busy #f) ;; is either of the queues busy, use to switch between queuing tasks or doing immediately - ;; app info - (appname #f) - (dbtypes (make-hash-table)) ;; this should be an alist but hash is easier. dbtype => [ initproc syncproc ] - ;; cookies - (cnum 0) ;; cookie num - ) - -;;====================================================================== -;; NEW APPROACH -;;====================================================================== - -;; start-server-find-port ;; gotta have a server port ready from the very begining - -;; udata - all the connection info, captain, server, ulex db etc. MUST BE PASSED IN -;; dbpath - full path and filename of the db to talk to or a symbol naming the db? -;; callname - the remote call to execute -;; params - parameters to pass to the remote call -;; -(define (remote-call udata dbpath dbtype callname . params) - (start-server-find-port udata) ;; ensure we have a local server - (find-or-setup-captain udata) - ;; look at connect, process-request, send, send-receive - (let-values (((cookie-key host-port)(get-db-owner udata dbpath dbtype))) - (send-receive udata host-port callname cookie-key params))) - -;;====================================================================== -;; KEY FUNCTIONS - THESE ARE TOO BE EXPOSED AND USED -;;====================================================================== - -;; connection setup and management functions - -;; This is the basic setup command. Must always be -;; called before connecting to a db using connect. -;; -;; find or become the captain -;; setup and return a ulex object -;; -(define (find-or-setup-captain udata) - ;; see if we already have a captain and if the lease is ok - (if (and (udat-captain-address udata) - (udat-captain-port udata) - (< (current-seconds) (udat-captain-lease udata))) - udata - (let* ((cpkts (get-all-captain-pkts udata)) ;; read captain pkts - (captn (get-winning-pkt cpkts))) - (if captn - (let* ((port (alist-ref 'port captn)) - (host (alist-ref 'host captn)) - (ipaddr (alist-ref 'ipaddr captn)) - (pid (alist-ref 'pid captn)) - (Z (alist-ref 'Z captn))) - (udat-captain-address-set! udata ipaddr) - (udat-captain-host-set! udata host) - (udat-captain-port-set! udata port) - (udat-captain-pid-set! udata pid) - (udat-captain-lease-set! udata (+ (current-seconds) 10)) - (let-values (((success pingtime)(ping udata (conc ipaddr ":" port)))) - (if success - udata - (begin - (print "Found unreachable captain at " ipaddr ":" port ", removing pkt") - (remove-captain-pkt udata captn) - (find-or-setup-captain udata)))) - (begin - (setup-as-captain udata) ;; this saves the thread to captain-thread and starts the thread - (find-or-setup-captain udata))))))) - -;; connect to a specific dbfile -;; - if already connected - return the dbowner host-port -;; - ask the captain who to talk to for this db -;; - put the entry in the dbowners hash as dbfile => host-port -;; -(define (connect udata dbfname dbtype) - (or (hash-table-ref/default (udat-dbowners udata) dbfname #f) - (let-values (((success dbowner-host-port)(get-db-owner udata dbfname dbtype))) - (if success - (begin - ;; just clobber the record, this is the new data no matter what - (hash-table-set! (udat-dbowners udata) dbfname dbowner-host-port) - dbowner-host-port) - #f)))) - -;; returns: success pingtime -;; -;; NOTE: causes the callee to store the info on this host along with the dbs this host currently owns -;; -(define (ping udata host-port) - (let* ((start (current-milliseconds)) - (cookie (make-cookie udata)) - (dbs (udat-my-dbs udata)) - (msg (string-intersperse dbs " ")) - (res (send udata host-port 'ping cookie msg retval: #t)) - (delta (- (current-milliseconds) start))) - (values (equal? res cookie) delta))) - -;; returns: success pingtime -;; -;; NOTE: causes all references to this worker to be wiped out in the -;; callee (ususally the captain) -;; -(define (goodbye-ping udata host-port) - (let* ((start (current-milliseconds)) - (cookie (make-cookie udata)) - (dbs (udat-my-dbs udata)) - (res (send udata host-port 'goodbye cookie "nomsg" retval: #t)) - (delta (- (current-milliseconds) start))) - (values (equal? res cookie) delta))) - -(define (goodbye-captain udata) - (let* ((host-port (udat-captain-host-port udata))) - (if host-port - (goodbye-ping udata host-port) - (values #f -1)))) - -(define (get-db-owner udata dbname dbtype) - (let* ((host-port (udat-captain-host-port udata))) - (if host-port - (let* ((cookie (make-cookie udata)) - (msg #f) ;; (conc dbname " " dbtype)) - (params `(,dbname ,dbtype)) - (res (send udata host-port 'db-owner cookie msg - params: params retval: #t))) - (match (string-split res) - ((retcookie owner-host-port) - (values (equal? retcookie cookie) owner-host-port)))) - (values #f -1)))) - -;; called in ulex-handler to dispatch work, called on the workers side -;; calls (proc params data) -;; returns result with cookie -;; -;; pdat is the info of the caller, used to send the result data -;; prockey is key into udat-handlers hash dereferencing a proc -;; procparam is a first param handed to proc - often to do further derefrencing -;; NOTE: params is intended to be a list of strings, encoding on data -;; is up to the user but data must be a single line -;; -(define (process-request udata pdat dbname cookie prockey procparam data) - (let* ((dbrec (ulex-open-db udata dbname)) ;; this will be a dbconn record, looks for in udata first - (proc (hash-table-ref udata prockey))) - (let* ((result (proc dbrec procparam data))) - result))) - -;; remote-request - send to remote to process in process-request -;; uconn comes from a call to connect and can be used instead of calling connect again -;; uconn is the host-port to call -;; we send dbname to the worker so they know which file to open -;; data must be a string with no newlines, it will be handed to the proc -;; at the remote site unchanged. It is up to the user to encode/decode it's contents -;; -;; rtype: immediate, read-only, normal, low-priority -;; -(define (remote-request udata uconn rtype dbname prockey procparam data) - (let* ((cookie (make-cookie udata))) - (send-receive udata uconn rtype cookie data `(,prockey procparam)))) - -(define (ulex-open-db udata dbname) - #f) - - -;;====================================================================== -;; Ulex db -;; -;; - track who is captain, lease expire time -;; - track who owns what db, lease -;; -;;====================================================================== - -;; -;; -(define (ulex-dbfname) - (let ((dbdir (conc (get-environment-variable "HOME") "/.ulex"))) - (if (not (file-exists? dbdir)) - (create-directory dbdir #t)) - (conc dbdir "/network.db"))) - -;; always goes in ~/.ulex/network.db -;; role is captain, adjutant, node -;; -(define (ulexdb-setup) - (let* ((dbfname (ulex-dbfname)) - (have-db (file-exists? dbfname)) - (db (sqlite3:open-database dbfname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (if (not have-db) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (stmt) - (if stmt (sqlite3:execute db stmt))) - `("CREATE TABLE IF NOT EXISTS nodes - (id INTEGER PRIMARY KEY, - role TEXT NOT NULL, - host TEXT NOT NULL, - port TEXT NOT NULL, - ipadr TEXT NOT NULL, - pid INTEGER NOT NULL, - zcard TEXT NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - lease_thru INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - "CREATE TRIGGER IF NOT EXISTS update_nodes_trigger AFTER UPDATE ON nodes - FOR EACH ROW - BEGIN - UPDATE nodes SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" - "CREATE TABLE IF NOT EXISTS dbs - (id INTEGER PRIMARY KEY, - dbname TEXT NOT NULL, - dbfile TEXT NOT NULL, - dbtype TEXT NOT NULL, - host_port TEXT NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - lease_thru INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - "CREATE TRIGGER IF NOT EXISTS update_dbs_trigger AFTER UPDATE ON dbs - FOR EACH ROW - BEGIN - UPDATE dbs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;"))))) - db)) - -(define (get-host-port-lease db dbfname) - (sqlite3:fold-row - (lambda (rem host-port lease-thru) - (list host-port lease-thru)) - #f db "SELECT host_port,lease_thru FROM dbs WHERE dbfile = ?" dbfname)) - -(define (register-captain db host ipadr port pid zcard #!key (lease 20)) - (let* ((dbfname (ulex-dbfname)) - (host-port (conc host ":" port))) - (sqlite3:with-transaction - db - (lambda () - (match (get-host-port-lease db dbfname) - ((host-port lease-thru) - (if (> (current-seconds) lease-thru) - (begin - (sqlite3:execute db "UPDATE dbs SET host_port=?,lease_thru=? WHERE dbname=?" - (conc host ":" port) - (+ (current-seconds) lease) - dbfname) - #t) - #f)) - (#f (sqlite3:execute db "INSERT INTO dbs (dbname,dbfile,dbtype,host_port,lease_thru) VALUES (?,?,?,?,?)" - "captain" dbfname "captain" host-port (+ (current-seconds) lease))) - (else (print "ERROR: Unrecognised result from fold-row") - (exit 1))))))) - +(module ulex + * + #;( + + ;; NOTE: looking for the handler proc - find the run-listener :) + + run-listener ;; (run-listener handler-proc [port]) => uconn + + ;; NOTE: handler-proc params; + ;; (handler-proc rem-host-port qrykey cmd params) + + send-receive ;; (send-receive uconn host-port cmd data) + + ;; NOTE: cmd can be any plain text symbol except for these; + ;; 'ping 'ack 'goodbye 'response + + set-work-handler ;; (set-work-handler proc) + + wait-and-close ;; (wait-and-close uconn) + + ulex-listener? + + ;; needed to get the interface:port that was automatically found + udat-port + udat-host-port + + ;; for testing only + ;; pp-uconn + + ;; parameters + work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct + return-method ;; parameter; 'mailbox, 'polling, 'direct + ) + +(import scheme + chicken.base + chicken.file + chicken.io + chicken.time + chicken.condition + chicken.string + chicken.sort + chicken.pretty-print + + address-info + mailbox + matchable + ;; queues + regex + regex-case + simple-exceptions + s11n + srfi-1 + srfi-18 + srfi-4 + srfi-69 + system-information + tcp6 + typed-records + ) + +;; udat struct, used by both caller and callee +;; instantiated as uconn by convention +;; +(defstruct udat + ;; the listener side + (port #f) + (host-port #f) + (socket #f) + ;; the peers + (peers (make-hash-table)) ;; host:port->peer + ;; work handling + (work-queue (make-mailbox)) + (work-proc #f) ;; set by user + (cnum 0) ;; cookie number + (mboxes (make-hash-table)) ;; for the replies + (avail-cmboxes '()) ;; list of ( . ) for re-use + ;; threads + (numthreads 10) + (cmd-thread #f) + (work-queue-thread #f) + (num-threads-running 0) + ) + +;; Parameters + +;; work-method: +(define work-method (make-parameter 'mailbox)) +;; mailbox - all rdat goes through mailbox +;; threads - all rdat immediately executed in new thread +;; direct - no queuing +;; + +;; return-method, return the result to waiting send-receive: +(define return-method (make-parameter 'mailbox)) +;; mailbox - create a mailbox and use it for passing returning results to send-receive +;; polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result +;; direct - no queuing, result is passed back in single tcp connection +;; + +;; ;; struct for keeping track of others we are talking to +;; ;; +;; (defstruct pdat +;; (host-port #f) +;; (conns '()) ;; list of pcon structs, pop one off when calling the peer +;; ) +;; +;; ;; struct for peer connections, keep track of expiration etc. +;; ;; +;; (defstruct pcon +;; (inp #f) +;; (oup #f) +;; (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59) +;; (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes +;; ) + +;;====================================================================== +;; listener +;;====================================================================== + +;; is uconn a ulex connector (listener) +;; +(define (ulex-listener? uconn) + (udat? uconn)) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (setup-listener uconn #!optional (port 4242)) + (handle-exceptions + exn + (if (< port 65535) + (setup-listener uconn (+ port 1)) + #f) + (connect-listener uconn port))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (udat-port-set! uconn port) + (udat-host-port-set! uconn (conc addr":"port)) + (udat-socket-set! uconn tlsn) + uconn)) + +;; run-listener does all the work of starting a listener in a thread +;; it then returns control +;; +(define (run-listener handler-proc #!optional (port-suggestion 4242)) + (let* ((uconn (make-udat))) + (udat-work-proc-set! uconn handler-proc) + (if (setup-listener uconn port-suggestion) + (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop")) + (th2 (make-thread (lambda () + (case (work-method) + ((mailbox limited) + (process-work-queue uconn)))) + "Ulex work queue processor"))) + ;; (tcp-buffer-size 2048) + (thread-start! th1) + (thread-start! th2) + (udat-cmd-thread-set! uconn th1) + (udat-work-queue-thread-set! uconn th2) + (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".") + uconn) + (assert #f "ERROR: run-listener called without proper setup.")))) + +(define (wait-and-close uconn) + (thread-join! (udat-cmd-thread uconn)) + (tcp-close (udat-socket uconn))) + +;;====================================================================== +;; peers and connections +;;====================================================================== + +(define *send-mutex* (make-mutex)) + +;; send structured data to recipient +;; +;; NOTE: qrykey is what was called the "cookie" previously +;; +;; retval tells send to expect and wait for return data (one line) and return it or time out +;; this is for ping where we don't want to necessarily have set up our own server yet. +;; +;; NOTE: see below for beginnings of code to allow re-use of tcp connections +;; - I believe (without substantial evidence) that re-using connections will +;; be beneficial ... +;; +(define (send udata host-port qrykey cmd params) + (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this + (isme #f #;(equal? host-port my-host-port)) ;; calling myself? + ;; dat is a self-contained work block that can be sent or handled locally + (dat (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds))))) + (cond + (isme (ulex-handler udata dat)) ;; no transmission needed + (else + (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? + exn + (message exn) + (begin + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP + (let-values (((inp oup)(tcp-connect host-port))) + (let ((res (if (and inp oup) + (begin + (serialize dat oup) + (close-output-port oup) + (deserialize inp) + ) + (begin + (print "ERROR: send called but no receiver has been setup. Please call setup first!") + #f)))) + (close-input-port inp) + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + res)))))))) ;; res will always be 'ack unless return-method is direct + +(define (send-via-polling uconn host-port cmd data) + (let* ((qrykey (make-cookie uconn)) + (sres (send uconn host-port qrykey cmd data))) + (case sres + ((ack) + (let loop ((start-time (current-milliseconds))) + (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout + (begin + (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data) + #f) + (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash + (if result ;; result is '(status . result-data) or #f for nothing yet + (begin + (hash-table-delete! (udat-mboxes uconn) qrykey) + (cdr result)) + (begin + (thread-sleep! 0.01) + (loop start-time))))))) + (else + (print "ULEX ERROR: Communication failed? sres="sres) + #f)))) + +(define (send-via-mailbox uconn host-port cmd data) + (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? + (qrykey (car cmbox)) + (mbox (cdr cmbox)) + (mbox-time (current-milliseconds)) + (sres (send uconn host-port qrykey cmd data))) ;; short res + (if (eq? sres 'ack) ;; BUG: change to be less than server:expiration-timeout? + (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread))) + #f + 120)) ;; timeout) + (mbox-timeout-result 'MBOX_TIMEOUT) + (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) + (mbox-receive-time (current-milliseconds))) + ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? + (hash-table-delete! (udat-mboxes uconn) qrykey) + (if (eq? res 'MBOX_TIMEOUT) + (begin + (print "WARNING: mbox timed out for query "cmd", with data "data + ", waiting for response from "host-port".") + + ;; here it might make sense to clean up connection records and force clean start? + ;; NO. The progam using ulex needs to do the reset. Right thing here is exception + + #f) ;; convert to raising exception? + res)) + (begin + (print "ERROR: Communication failed? Got "sres) + #f)))) + +;; send a request to the given host-port and register a mailbox in udata +;; wait for the mailbox data and return it +;; +(define (send-receive uconn host-port cmd data) + (let* ((start-time (current-milliseconds)) + (result (cond + ((member cmd '(ping goodbye)) ;; these are immediate + (send uconn host-port 'ping cmd data)) + ((eq? (work-method) 'direct) + ;; the result from send will be the actual result, not an 'ack + (send uconn host-port 'direct cmd data)) + (else + (case (return-method) + ((polling) + (send-via-polling uconn host-port cmd data)) + ((mailbox) + (send-via-mailbox uconn host-port cmd data)) + (else + (print "ULEX ERROR: unrecognised return-method "(return-method)".") + #f))))) + (duration (- (current-milliseconds) start-time))) + ;; this is ONLY for development and debugging. It will be removed once Ulex is stable. + (if (< 5000 duration) + (print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000))) + " seconds; "cmd", host-port="host-port", data="data)) + result)) + + +;;====================================================================== +;; responder side +;;====================================================================== + +;; take a request, rdat, and if not immediate put it in the work queue +;; +;; Reserved cmds; ack ping goodbye response +;; +(define (ulex-handler uconn rdat) + (assert (list? rdat) "FATAL: ulex-handler give rdat as not list") + (match rdat ;; (string-split controldat) + ((rem-host-port qrykey cmd params);; timedata) + ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params) + (case cmd + ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack) + ((ping) + ;; (print "Got Ping!") + ;; (add-to-work-queue uconn rdat) + 'ack) + ((goodbye) + ;; just clear out references to the caller. NOT COMPLETE + (add-to-work-queue uconn rdat) + 'ack) + ((response) ;; this is a result from remote processing, send it as mail ... + (case (return-method) + ((polling) + (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params)) + 'ack) + ((mailbox) + (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) + (if mbox + (begin + (mailbox-send! mbox params) ;; params here is our result + 'ack) + (begin + (print "ERROR: received result but no associated mbox for cookie "qrykey) + 'no-mbox-found)))) + (else (print "ULEX ERROR: unrecognised return-method "(return-method)) + 'bad-return-method))) + (else ;; generic request - hand it to the work queue + (add-to-work-queue uconn rdat) + 'ack))) + (else + (print "ULEX ERROR: bad rdat "rdat) + 'bad-rdat))) + +;; given an already set up uconn start the cmd-loop +;; +(define (ulex-cmd-loop uconn) + (let* ((serv-listener (udat-socket uconn)) + (listener (lambda () + (let loop ((state 'start)) + (let-values (((inp oup)(tcp-accept serv-listener))) + ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP + (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params) + (resp (ulex-handler uconn rdat))) + (serialize resp oup) + (close-input-port inp) + (close-output-port oup) + ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP + ) + (loop state)))))) + ;; start N of them + (let loop ((thnum 0) + (threads '())) + (if (< thnum 100) + (let* ((th (make-thread listener (conc "listener" thnum)))) + (thread-start! th) + (loop (+ thnum 1) + (cons th threads))) + (map thread-join! threads))))) + +;; add a proc to the cmd list, these are done symetrically (i.e. in all instances) +;; so that the proc can be dereferenced remotely +;; +(define (set-work-handler uconn proc) + (udat-work-proc-set! uconn proc)) + +;;====================================================================== +;; work queues - this is all happening on the listener side +;;====================================================================== + +;; rdat is (rem-host-port qrykey cmd params) + +(define (add-to-work-queue uconn rdat) + #;(queue-add! (udat-work-queue uconn) rdat) + (case (work-method) + ((threads) + (thread-start! (make-thread (lambda () + (do-work uconn rdat)) + "worker thread"))) + ((mailbox) + (mailbox-send! (udat-work-queue uconn) rdat)) + ((direct) + (do-work uconn rdat)) + (else + (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.") + (mailbox-send! (udat-work-queue uconn) rdat)))) + +;; move the logic to return the result somewhere else? +;; +(define (do-work uconn rdat) + (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change + ;; put this following into a do-work procedure + (match rdat + ((rem-host-port qrykey cmd params) + (let* ((start-time (current-milliseconds)) + (result (proc rem-host-port qrykey cmd params)) + (end-time (current-milliseconds)) + (run-time (- end-time start-time))) + (case (work-method) + ((direct) result) + (else + (if (> run-time 1000)(print "ULEX: Warning, work "cmd", "params" done in "run-time" ms")) + ;; send 'response as cmd and result as params + (send uconn rem-host-port qrykey 'response result) ;; could check for ack + (let* ((duration (- (current-milliseconds) end-time))) + (if (> duration 500)(print "ULEX: Warning, response sent back to "rem-host-port" for "qrykey" in "duration"ms"))))))) + (MBOX_TIMEOUT 'do-work-timeout) + (else + (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params"))))) + +;; NEW APPROACH: +;; +(define (process-work-queue uconn) + (let ((wqueue (udat-work-queue uconn)) + (proc (udat-work-proc uconn)) + (numthr (udat-numthreads uconn))) + (let loop ((thnum 1) + (threads '())) + (let ((thlst (cons (make-thread (lambda () + (let work-loop () + (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT))) + (do-work uconn rdat)) + (work-loop))) + (conc "work thread " thnum)) + threads))) + (if (< thnum numthr) + (loop (+ thnum 1) + thlst) + (begin + (print "ULEX: Starting "(length thlst)" worker threads.") + (map thread-start! thlst) + (print "ULEX: Threads started. Joining all.") + (map thread-join! thlst))))))) + +;; below was to enable re-use of connections. This seems non-trivial so for +;; now lets open on each call +;; +;; ;; given host-port get or create peer struct +;; ;; +;; (define (udat-get-peer uconn host-port) +;; (or (hash-table-ref/default (udat-peers uconn) host-port #f) +;; ;; no peer, so create pdat and init it +;; +;; ;; NEED stack of connections, pop and use; inp, oup, +;; ;; creation_time (remove and create new if over 24hrs old +;; ;; +;; (let ((pdat (make-pdat host-port: host-port))) +;; (hash-table-set! (udat-peers uconn) host-port pdat) +;; pdat))) +;; +;; ;; is pcon alive +;; +;; ;; given host-port and pdat get a pcon +;; ;; +;; (define (pdat-get-pcon pdat host-port) +;; (let loop ((conns (pdat-conns pdat))) +;; (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later +;; (init-pcon (make-pcon)) +;; (let* ((conn (pop conns))) +;; +;; ;; given host-port get a pcon struct +;; ;; +;; (define (udat-get-pcon + +;;====================================================================== +;; misc utils +;;====================================================================== + +(define (make-cookie uconn) + (let ((newcnum (+ (udat-cnum uconn) 1))) + (udat-cnum-set! uconn newcnum) + (conc (udat-host-port uconn) ":" + newcnum))) + +;; cookie/mboxes + +;; we store each mbox with a cookie ( . ) +;; +(define (get-cmbox uconn) + (if (null? (udat-avail-cmboxes uconn)) + (let ((cookie (make-cookie uconn)) + (mbox (make-mailbox))) + (hash-table-set! (udat-mboxes uconn) cookie mbox) + `(,cookie . ,mbox)) + (let ((cmbox (car (udat-avail-cmboxes uconn)))) + (udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn))) + cmbox))) + +(define (put-cmbox uconn cmbox) + (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn)))) + +(define (pp-uconn uconn) + (pp (udat->alist uconn))) + + ;;====================================================================== ;; network utilities ;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this (define (rate-ip ipaddr) (regex-case ipaddr ( "^127\\..*" _ 0 ) ( "^(10\\.0|192\\.168)\\..*" _ 1 ) @@ -354,1899 +545,26 @@ ;; Change this to bias for addresses with a reasonable broadcast value? ;; (define (ip-pref-less? a b) (> (rate-ip a) (rate-ip b))) - (define (get-my-best-address) - (let ((all-my-addresses (get-all-ips)) - ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) - ) + (let ((all-my-addresses (get-all-ips))) (cond ((null? all-my-addresses) (get-host-name)) ;; no interfaces? ((eq? (length all-my-addresses) 1) (car all-my-addresses)) ;; only one to choose from, just go with it - (else - (car (sort all-my-addresses ip-pref-less?))) - ;; (else - ;; (ip->string (car (filter (lambda (x) ;; take any but 127. - ;; (not (eq? (u8vector-ref x 0) 127))) - ;; all-my-addresses)))) - - ))) + (car (sort all-my-addresses ip-pref-less?)))))) (define (get-all-ips-sorted) (sort (get-all-ips) ip-pref-less?)) (define (get-all-ips) - (map ip->string (vector->list - (hostinfo-addresses - (host-information (current-hostname)))))) - -(define (udat-my-host-port udata) - (if (and (udat-my-address udata)(udat-my-port udata)) - (conc (udat-my-address udata) ":" (udat-my-port udata)) - #f)) - -(define (udat-captain-host-port udata) - (if (and (udat-captain-address udata)(udat-captain-port udata)) - (conc (udat-captain-address udata) ":" (udat-captain-port udata)) - #f)) - -(define (udat-get-peer udata host-port) - (hash-table-ref/default (udat-peers udata) host-port #f)) - -;; struct for keeping track of others we are talking to - -(defstruct peer - (addr-port #f) - (hostname #f) - (pid #f) - ;; (inp #f) - ;; (oup #f) - (dbs '()) ;; list of databases this peer is currently handling - ) - -(defstruct work - (peer-dat #f) - (handlerkey #f) - (qrykey #f) - (data #f) - (start (current-milliseconds))) - -#;(defstruct dbowner - (pdat #f) - (last-update (current-seconds))) - -;;====================================================================== -;; Captain functions -;;====================================================================== - -;; NB// This needs to be started in a thread -;; -;; setup to be a captain -;; - local server MUST be started already -;; - create pkt -;; - start server port handler -;; -(define (setup-as-captain udata) - (if (create-captain-pkt udata) - (let* ((my-addr (udat-my-address udata)) - (my-port (udat-my-port udata)) - (th (make-thread (lambda () - (ulex-handler-loop udata)) "Captain handler"))) - (udat-handler-thread-set! udata th) - (udat-captain-address-set! udata my-addr) - (udat-captain-port-set! udata my-port) - (thread-start! th)) - (begin - (print "ERROR: failed to create captain pkt") - #f))) - -;; given a pkts dir read -;; -(define (get-all-captain-pkts udata) - (let* ((pktsdir (let ((d (udat-cpkts-dir udata))) - (if (file-exists? d) - d - (begin - (create-directory d #t) - d)))) - (all-pkt-files (glob (conc pktsdir "/*.pkt"))) - (pkt-spec (udat-cpkt-spec udata))) - (map (lambda (pkt-file) - (read-pkt->alist pkt-file pktspec: pkt-spec)) - all-pkt-files))) - -;; sort by D then Z, return one, choose the oldest then -;; differentiate if needed using the Z key -;;l -(define (get-winning-pkt pkts) - (if (null? pkts) - #f - (car (sort pkts (lambda (a b) - (let ((ad (string->number (alist-ref 'D a))) - (bd (string->number (alist-ref 'D b)))) - (if (eq? a b) - (let ((az (alist-ref 'Z a)) - (bz (alist-ref 'Z b))) - (string>=? az bz)) - (> ad bd)))))))) - -;; put the host, ip, port and pid into a pkt in -;; the captain pkts dir -;; - assumes user has already fired up a server -;; which will be in the udata struct -;; -(define (create-captain-pkt udata) - (if (not (udat-serv-listener udata)) - (begin - (print "ERROR: create-captain-pkt called with out a listener") - #f) - (let* ((pktdat `((port . ,(udat-my-port udata)) - (host . ,(udat-my-hostname udata)) - (ipaddr . ,(udat-my-address udata)) - (pid . ,(udat-my-pid udata)))) - (pktdir (udat-cpkts-dir udata)) - (pktspec (udat-cpkt-spec udata)) - ) - (udat-my-cpkt-key-set! - udata - (write-alist->pkt - pktdir - pktdat - pktspec: pktspec - ptype: 'captain)) - (udat-my-cpkt-key udata)))) - -;; remove pkt associated with captn (the Z key .pkt) -;; -(define (remove-captain-pkt udata captn) - (let ((Z (alist-ref 'Z captn)) - (cpktdir (udat-cpkts-dir udata))) - (delete-file* (conc cpktdir "/" Z ".pkt")))) - -;; call all known peers and tell them to delete their info on the captain -;; thus forcing them to re-read pkts and connect to a new captain -;; call this when the captain needs to exit and if an older captain is -;; detected. Due to delays in sending file meta data in NFS multiple -;; captains can be initiated in a "Storm of Captains", book soon to be -;; on Amazon -;; -(define (drop-captain udata) - (let* ((peers (hash-table-keys (udat-peers udata))) - (cookie (make-cookie udata))) - (for-each - (lambda (host-port) - (send udata host-port 'dropcaptain cookie "nomsg" retval: #t)) - peers))) - -;;====================================================================== -;; server primitives -;;====================================================================== - -(define (make-cookie udata) - (let ((newcnum (+ (udat-cnum udata) 1))) - (udat-cnum-set! udata newcnum) - (conc (udat-my-address udata) ":" - (udat-my-port udata) "-" - (udat-my-pid udata) "-" - newcnum))) - -;; create a tcp listener and return a populated udat struct with -;; my port, address, hostname, pid etc. -;; return #f if fail to find a port to allocate. -;; -;; if udata-in is #f create the record -;; if there is already a serv-listener return the udata -;; -(define (start-server-find-port udata-in #!optional (port 4242)) - (let ((udata (or udata-in (make-udat)))) - (if (udat-serv-listener udata) ;; TODO - add check that the listener is alive and ready? - udata - (handle-exceptions - exn - (if (< port 65535) - (start-server-find-port udata (+ port 1)) - #f) - (connect-server udata port))))) - -(define (connect-server udata port) - ;; (tcp-listener-socket LISTENER)(socket-name so) - ;; sockaddr-address, sockaddr-port, sockaddr->string - (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) - (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) - (udat-my-address-set! udata addr) - (udat-my-port-set! udata port) - (udat-my-hostname-set! udata (get-host-name)) - (udat-serv-listener-set! udata tlsn) - udata)) - -(define (get-peer-dat udata host-port #!optional (hostname #f)(pid #f)) - (let* ((pdat (or (udat-get-peer udata host-port) - (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE SPECIFIC - exn - #f - (let ((npdat (make-peer addr-port: host-port))) - (if hostname (peer-hostname-set! npdat hostname)) - (if pid (peer-pid-set! npdat pid)) - npdat))))) - pdat)) - -;; send structured data to recipient -;; -;; NOTE: qrykey is what was called the "cookie" previously -;; -;; retval tells send to expect and wait for return data (one line) and return it or time out -;; this is for ping where we don't want to necessarily have set up our own server yet. -;; -(define (send udata host-port handler qrykey data - #!key (hostname #f)(pid #f)(params '())(retval #f)) - (let* ((my-host-port (udat-my-host-port udata)) - (isme (equal? host-port my-host-port)) ;; am I calling - ;; myself? - (dat (list - handler ;; " " - my-host-port ;; " " - (udat-my-pid udata) ;; " " - qrykey - params ;;(if (null? params) "" (conc " " - ;;(string-intersperse params " "))) - ))) - ;; (print "send isme is " (if isme "true!" "false!") ", - ;; my-host-port: " my-host-port ", host-port: " host-port) - (if isme - (ulex-handler udata dat data) - (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE - ;; SPECIFIC - exn - #f - (let-values (((inp oup)(tcp-connect host-port))) - ;; - ;; CONTROL LINE: - ;; handlerkey host:port pid qrykey params ... - ;; - (let ((res - (if (and inp oup) - (let* () - (if my-host-port - (begin - (write dat oup) - (write data oup) ;; send as sexpr - ;; (print "Sent dat: " dat " data: " data) - (if retval - (read inp) - #t)) - (begin - (print "ERROR: send called but no receiver has been setup. Please call setup first!") - #f)) - ;; NOTE: DO NOT BE TEMPTED TO LOOK AT ANY DATA ON INP HERE! - ;; (there is a listener for handling that) - ) - #f))) ;; #f means failed to connect and send - (close-input-port inp) - (close-output-port oup) - res)))))) - -;; send a request to the given host-port and register a mailbox in udata -;; wait for the mailbox data and return it -;; -(define (send-receive udata host-port handler qrykey data #!key (hostname #f)(pid #f)(params '())(timeout 20)) - (let ((mbox (make-mailbox)) - (mbox-time (current-milliseconds)) - (mboxes (udat-mboxes udata))) - (hash-table-set! mboxes qrykey mbox) - (if (send udata host-port handler qrykey data hostname: hostname pid: pid params: params) - (let* ((mbox-timeout-secs timeout) - (mbox-timeout-result 'MBOX_TIMEOUT) - (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) - (mbox-receive-time (current-milliseconds))) - (hash-table-delete! mboxes qrykey) - (if (eq? res 'MBOX_TIMEOUT) - #f - res)) - #f))) ;; #f means failed to communicate - -;; -(define (ulex-handler udata controldat data) - (print "controldat: " controldat " data: " data) - (match controldat ;; (string-split controldat) - ((handlerkey host-port pid qrykey params ...) - ;; (print "handlerkey: " handlerkey " host-port: " host-port " pid: " pid " qrykey: " qrykey " params: " params) - (case handlerkey ;; (string->symbol handlerkey) - ((ack)(print "Got ack!")) - ((ping) ;; special case - return result immediately on the same connection - (let* ((proc (hash-table-ref/default (udat-handlers udata) 'ping #f)) - (val (if proc (proc) "gotping")) - (peer (make-peer addr-port: host-port pid: pid)) - (dbshash (udat-dbowners udata))) - (peer-dbs-set! peer params) ;; params for ping is list of dbs owned by pinger - (for-each (lambda (dbfile) - (hash-table-set! dbshash dbfile host-port)) ;; WRONG? - params) ;; register each db in the dbshash - (if (not (hash-table-exists? (udat-peers udata) host-port)) - (hash-table-set! (udat-peers udata) host-port peer)) ;; save the details of this caller in peers - qrykey)) ;; End of ping - ((goodbye) - ;; remove all traces of the caller in db ownership etc. - (let* ((peer (hash-table-ref/default (udat-peers udata) host-port #f)) - (dbs (if peer (peer-dbs peer) '())) - (dbshash (udat-dbowners udata))) - (for-each (lambda (dbfile)(hash-table-delete! dbshash dbfile)) dbs) - (hash-table-delete! (udat-peers udata) host-port) - qrykey)) - ((dropcaptain) - ;; remove all traces of the captain - (udat-captain-address-set! udata #f) - (udat-captain-host-set! udata #f) - (udat-captain-port-set! udata #f) - (udat-captain-pid-set! udata #f) - qrykey) - ((rucaptain) ;; remote is asking if I'm the captain - (if (udat-my-cpkt-key udata) "yes" "no")) - ((db-owner) ;; given a db name who do I send my queries to - ;; look up the file in handlers, if have an entry ping them to be sure - ;; they are still alive and then return that host:port. - ;; if no handler found or if the ping fails pick from peers the oldest that - ;; is managing the fewest dbs - (match params - ((dbfile dbtype) - (let* ((owner-host-port (hash-table-ref/default (udat-dbowners udata) dbfile #f))) - (if owner-host-port - (conc qrykey " " owner-host-port) - (let* ((pdat (or (hash-table-ref/default (udat-peers udata) host-port #f) ;; no owner - caller gets to own it! - (make-peer addr-port: host-port pid: pid dbs: `(,dbfile))))) - (hash-table-set! (udat-peers udata) host-port pdat) - (hash-table-set! (udat-dbowners udata) dbfile host-port) - (conc qrykey " " host-port))))) - (else (conc qrykey " BADDATA")))) - ;; for work items: - ;; handler is one of; immediate, read-only, read-write, high-priority - ((immediate read-only normal low-priority) ;; do this work immediately - ;; host-port (caller), pid (caller), qrykey (cookie), params <= all from first line - ;; data => a single line encoded however you want, or should I build json into it? - (print "handlerkey=" handlerkey) - (let* ((pdat (get-peer-dat udata host-port))) - (match params ;; dbfile prockey procparam - ((dbfile prockey procparam) - (case handlerkey - ((immediate read-only) - (process-request udata pdat dbfile qrykey prockey procparam data)) - ((normal low-priority) ;; split off later and add logic to support low priority - (add-to-work-queue udata pdat dbfile qrykey prockey procparam data)) - (else - #f))) - (else - (print "INFO: params=" params " handlerkey=" handlerkey " controldat=" controldat) - #f)))) - (else - ;; (add-to-work-queue udata (get-peer-dat udata host-port) handlerkey qrykey data) - #f))) - (else - (print "BAD DATA? controldat=" controldat " data=" data) - #f)));; handles the incoming messages and dispatches to queues - -;; -(define (ulex-handler-loop udata) - (let* ((serv-listener (udat-serv-listener udata))) - ;; data comes as two lines - ;; handlerkey resp-addr:resp-port hostname pid qrykey [dbpath/dbfile.db] - ;; data - (let loop ((state 'start)) - (let-values (((inp oup)(tcp-accept serv-listener))) - (let* ((controldat (read inp)) - (data (read inp)) - (resp (ulex-handler udata controldat data))) - (if resp (write resp oup)) - (close-input-port inp) - (close-output-port oup)) - (loop state))))) - -;; add a proc to the handler list, these are done symetrically (i.e. in all instances) -;; so that the proc can be dereferenced remotely -;; -(define (register-handler udata key proc) - (hash-table-set! (udat-handlers udata) key proc)) - - -;;====================================================================== -;; work queues -;;====================================================================== - -(define (add-to-work-queue udata peer-dat handlerkey qrykey data) - (let ((wdat (make-work peer-dat: peer-dat handlerkey: handlerkey qrykey: qrykey data: data))) - (if (udat-busy udata) - (queue-add! (udat-work-queue udata) wdat) - (process-work udata wdat)) ;; passing in wdat tells process-work to first process the passed in wdat - )) - -(define (do-work udata wdat) - #f) - -(define (process-work udata #!optional wdat) - (if wdat (do-work udata wdat)) ;; process wdat - (let ((wqueue (udat-work-queue udata))) - (if (not (queue-empty? wqueue)) - (let loop ((wd (queue-remove! wqueue))) - (do-work udata wd) - (if (not (queue-empty? wqueue)) - (loop (queue-remove! wqueue))))))) - -;;====================================================================== -;; Generic db handling -;; setup a inmem db instance -;; open connection to on-disk db -;; sync on-disk db to inmem -;; get lock in on-disk db for dbowner of this db -;; put sync-proc, init-proc, on-disk handle, inmem handle in dbconn stuct -;; return the stuct -;;====================================================================== - -(defstruct dbconn - (fname #f) - (inmem #f) - (conn #f) - (sync #f) ;; sync proc - (init #f) ;; init proc - (lastsync (current-seconds)) - ) - -(defstruct dbinfo - (initproc #f) - (syncproc #f)) - -;; open inmem and disk database -;; init with initproc -;; return db struct -;; -;; appname; megatest, ulex or something else. -;; -(define (setup-db-connection udata fname-in appname dbtype) - (let* ((is-ulex (eq? appname 'ulex)) - (dbinf (if is-ulex ;; ulex is a built-in special case - (make-dbinfo initproc: ulexdb-init syncproc: ulexdb-sync) - (hash-table-ref/default (udat-dbtypes udata) dbtype #f))) - (initproc (dbinfo-initproc dbinf)) - (syncproc (dbinfo-syncproc dbinf)) - (fname (if is-ulex - (conc (udat-ulex-dir udata) "/ulex.db") - fname-in)) - (inmem-db (open-and-initdb udata #f 'inmem (dbinfo-initproc dbinf))) - (disk-db (open-and-initdb udata fname 'disk (dbinfo-initproc dbinf)))) - (make-dbconn inmem: inmem-db conn: disk-db sync: syncproc init: initproc))) - -;; dest='inmem or 'disk -;; -(define (open-and-initdb udata filename dest init-proc) - (let* ((inmem (eq? dest 'inmem)) - (dbfile (if inmem - ":INMEM:" - filename)) - (dbexists (if inmem #t (file-exists? dbfile))) - (db (sqlite3:open-database dbfile))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (if (not dbexists) - (init-proc db)) - db)) - - -;;====================================================================== -;; Previous Ulex db stuff -;;====================================================================== - -(define (ulexdb-init db inmem) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (stmt) - (if stmt (sqlite3:execute db stmt))) - `("CREATE TABLE IF NOT EXISTS processes - (id INTEGER PRIMARY KEY, - host TEXT NOT NULL, - ipadr TEXT NOT NULL, - port INTEGER NOT NULL, - pid INTEGER NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - (if inmem - "CREATE TRIGGER IF NOT EXISTS update_proces_trigger AFTER UPDATE ON processes - FOR EACH ROW - BEGIN - UPDATE processes SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" - #f)))))) - -;; open databases, do initial sync -(define (ulexdb-sync dbconndat udata) - #f) - - -) ;; END OF ULEX - - -;;; ;;====================================================================== -;;; ;; D E B U G H E L P E R S -;;; ;;====================================================================== -;;; -;;; (define (dbg> . args) -;;; (with-output-to-port (current-error-port) -;;; (lambda () -;;; (apply print "dbg> " args)))) -;;; -;;; (define (debug-pp . args) -;;; (if (get-environment-variable "ULEX_DEBUG") -;;; (with-output-to-port (current-error-port) -;;; (lambda () -;;; (apply pp args))))) -;;; -;;; (define *default-debug-port* (current-error-port)) -;;; -;;; (define (sdbg> fn stage-name stage-start stage-end start-time . message) -;;; (if (get-environment-variable "ULEX_DEBUG") -;;; (with-output-to-port *default-debug-port* -;;; (lambda () -;;; (apply print "ulex:" fn " " stage-name " took " (- (if stage-end stage-end (current-milliseconds)) stage-start) " ms. " -;;; (if start-time -;;; (conc "total time " (- (current-milliseconds) start-time) -;;; " ms.") -;;; "") -;;; message -;;; ))))) - -;;====================================================================== -;; M A C R O S -;;====================================================================== -;; iup callbacks are not dumping the stack, this is a work-around -;; - -;; Some of these routines use: -;; -;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html -;; -;; Syntax for defining macros in a simple style similar to function definiton, -;; when there is a single pattern for the argument list and there are no keywords. -;; -;; (define-simple-syntax (name arg ...) body ...) -;; -;; -;; (define-syntax define-simple-syntax -;; (syntax-rules () -;; ((_ (name arg ...) body ...) -;; (define-syntax name (syntax-rules () ((name arg ...) (begin body ...))))))) -;; -;; (define-simple-syntax (catch-and-dump proc procname) -;; (handle-exceptions -;; exn -;; (begin -;; (print-call-chain (current-error-port)) -;; (with-output-to-port (current-error-port) -;; (lambda () -;; (print ((condition-property-accessor 'exn 'message) exn)) -;; (print "Callback error in " procname) -;; (print "Full condition info:\n" (condition->list exn))))) -;; (proc))) -;; -;; -;;====================================================================== -;; R E C O R D S -;;====================================================================== - -;;; ;; information about me as a server -;;; ;; -;;; (defstruct area -;;; ;; about this area -;;; (useportlogger #f) -;;; (lowport 32768) -;;; (server-type 'auto) ;; auto=create up to five servers/pkts, main=create pkts, passive=no pkt (unless there are no pkts at all) -;;; (conn #f) -;;; (port #f) -;;; (myaddr (get-my-best-address)) -;;; pktid ;; get pkt from hosts table if needed -;;; pktfile -;;; pktsdir -;;; dbdir -;;; (dbhandles (make-hash-table)) ;; fname => list-of-dbh, NOTE: Should really never need more than one? -;;; (mutex (make-mutex)) -;;; (rtable (make-hash-table)) ;; registration table of available actions -;;; (dbs (make-hash-table)) ;; filename => random number, used for choosing what dbs I serve -;;; ;; about other servers -;;; (hosts (make-hash-table)) ;; key => hostdat -;;; (hoststats (make-hash-table)) ;; key => alist of fname => ( qcount . qtime ) -;;; (reqs (make-hash-table)) ;; uri => queue -;;; ;; work queues -;;; (wqueues (make-hash-table)) ;; fname => qdat -;;; (stats (make-hash-table)) ;; fname => totalqueries -;;; (last-srvup (current-seconds)) ;; last time we updated the known servers -;;; (cookie2mbox (make-hash-table)) ;; map cookie for outstanding request to mailbox of awaiting call -;;; (ready #f) -;;; (health (make-hash-table)) ;; ipaddr:port => num failed pings since last good ping -;;; ) -;;; -;;; ;; host stats -;;; ;; -;;; (defstruct hostdat -;;; (pkt #f) -;;; (dbload (make-hash-table)) ;; "dbfile.db" => queries/min -;;; (hostload #f) ;; normalized load ( 5min load / numcpus ) -;;; ) -;;; -;;; ;; dbdat -;;; ;; -;;; (defstruct dbdat -;;; (dbh #f) -;;; (fname #f) -;;; (write-access #f) -;;; (sths (make-hash-table)) ;; hash mapping query strings to handles -;;; ) -;;; -;;; ;; qdat -;;; ;; -;;; (defstruct qdat -;;; (writeq (make-queue)) -;;; (readq (make-queue)) -;;; (rwq (make-queue)) -;;; (logq (make-queue)) ;; do we need a queue for logging? yes, if we use sqlite3 db for logging -;;; (osshort (make-queue)) -;;; (oslong (make-queue)) -;;; (misc (make-queue)) ;; used for things like ping-full -;;; ) -;;; -;;; ;; calldat -;;; ;; -;;; (defstruct calldat -;;; (ctype 'dbwrite) -;;; (obj #f) ;; this would normally be an SQL statement e.g. SELECT, INSERT etc. -;;; (rtime (current-milliseconds))) -;;; -;;; ;; make it a global? Well, it is local to area module -;;; -;;; (define *pktspec* -;;; `((server (hostname . h) -;;; (port . p) -;;; (pid . i) -;;; (ipaddr . a) -;;; ) -;;; (data (hostname . h) ;; sender hostname -;;; (port . p) ;; sender port -;;; (ipaddr . a) ;; sender ip -;;; (hostkey . k) ;; sending host key - store info at server under this key -;;; (servkey . s) ;; server key - this needs to match at server end or reject the msg -;;; (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json -;;; (data . d) ;; base64 encoded slln data -;;; ))) -;;; -;;; ;; work item -;;; ;; -;;; (defstruct witem -;;; (rhost #f) ;; return host -;;; (ripaddr #f) ;; return ipaddr -;;; (rport #f) ;; return port -;;; (servkey #f) ;; the packet representing the client of this workitem, used by final send-message -;;; (rdat #f) ;; the request - usually an sql query, type is rdat -;;; (action #f) ;; the action: immediate, dbwrite, dbread,oslong, osshort -;;; (cookie #f) ;; cookie id for response -;;; (data #f) ;; the data payload, i.e. parameters -;;; (result #f) ;; the result from processing the data -;;; (caller #f)) ;; the calling peer according to rpc itself -;;; -;;; (define (trim-pktid pktid) -;;; (if (string? pktid) -;;; (substring pktid 0 4) -;;; "nopkt")) -;;; -;;; (define (any->number num) -;;; (cond -;;; ((number? num) num) -;;; ((string? num) (string->number num)) -;;; (else num))) -;;; -;;; (use trace) -;;; (trace-call-sites #t) -;;; -;;; ;;====================================================================== -;;; ;; D A T A B A S E H A N D L I N G -;;; ;;====================================================================== -;;; -;;; ;; look in dbhandles for a db, return it, else return #f -;;; ;; -;;; (define (get-dbh acfg fname) -;;; (let ((dbh-lst (hash-table-ref/default (area-dbhandles acfg) fname '()))) -;;; (if (null? dbh-lst) -;;; (begin -;;; ;; (print "opening db for " fname) -;;; (open-db acfg fname)) ;; Note that the handles get put back in the queue in the save-dbh calls -;;; (let ((rem-lst (cdr dbh-lst))) -;;; ;; (print "re-using saved connection for " fname) -;;; (hash-table-set! (area-dbhandles acfg) fname rem-lst) -;;; (car dbh-lst))))) -;;; -;;; (define (save-dbh acfg fname dbdat) -;;; ;; (print "saving dbh for " fname) -;;; (hash-table-set! (area-dbhandles acfg) fname (cons dbdat (hash-table-ref/default (area-dbhandles acfg) fname '())))) -;;; -;;; ;; open the database, if never before opened init it. put the handle in the -;;; ;; open db's hash table -;;; ;; returns: the dbdat -;;; ;; -;;; (define (open-db acfg fname) -;;; (let* ((fullname (conc (area-dbdir acfg) "/" fname)) -;;; (exists (file-exists? fullname)) -;;; (write-access (if exists -;;; (file-write-access? fullname) -;;; (file-write-access? (area-dbdir acfg)))) -;;; (db (sqlite3:open-database fullname)) -;;; (handler (sqlite3:make-busy-timeout 136000)) -;;; ) -;;; (sqlite3:set-busy-handler! db handler) -;;; (sqlite3:execute db "PRAGMA synchronous = 0;") -;;; (if (not exists) ;; need to init the db -;;; (if write-access -;;; (let ((isql (get-rsql acfg 'dbinitsql))) ;; get the init sql statements -;;; ;; (sqlite3:with-transaction -;;; ;; db -;;; ;; (lambda () -;;; (if isql -;;; (for-each -;;; (lambda (sql) -;;; (sqlite3:execute db sql)) -;;; isql))) -;;; (print "ERROR: no write access to " (area-dbdir acfg)))) -;;; (make-dbdat dbh: db fname: fname write-access: write-access))) -;;; -;;; ;; This is a low-level command to retrieve or to prepare, save and return a prepared statment -;;; ;; you must extract the db handle -;;; ;; -;;; (define (get-sth db cache stmt) -;;; (if (hash-table-exists? cache stmt) -;;; (begin -;;; ;; (print "Reusing cached stmt for " stmt) -;;; (hash-table-ref/default cache stmt #f)) -;;; (let ((sth (sqlite3:prepare db stmt))) -;;; (hash-table-set! cache stmt sth) -;;; ;; (print "prepared stmt for " stmt) -;;; sth))) -;;; -;;; ;; a little more expensive but does all the tedious deferencing - only use if you don't already -;;; ;; have dbdat and db sitting around -;;; ;; -;;; (define (full-get-sth acfg fname stmt) -;;; (let* ((dbdat (get-dbh acfg fname)) -;;; (db (dbdat-dbh dbdat)) -;;; (sths (dbdat-sths dbdat))) -;;; (get-sth db sths stmt))) -;;; -;;; ;; write to a db -;;; ;; acfg: area data -;;; ;; rdat: request data -;;; ;; hdat: (host . port) -;;; ;; -;;; ;; (define (dbwrite acfg rdat hdat data-in) -;;; ;; (let* ((dbname (car data-in)) -;;; ;; (dbdat (get-dbh acfg dbname)) -;;; ;; (db (dbdat-dbh dbdat)) -;;; ;; (sths (dbdat-sths dbdat)) -;;; ;; (stmt (calldat-obj rdat)) -;;; ;; (sth (get-sth db sths stmt)) -;;; ;; (data (cdr data-in))) -;;; ;; (print "dbname: " dbname " acfg: " acfg " rdat: " (calldat->alist rdat) " hdat: " hdat " data: " data) -;;; ;; (print "dbdat: " (dbdat->alist dbdat)) -;;; ;; (apply sqlite3:execute sth data) -;;; ;; (save-dbh acfg dbname dbdat) -;;; ;; #t -;;; ;; )) -;;; -;;; (define (finalize-all-db-handles acfg) -;;; (let* ((dbhandles (area-dbhandles acfg)) ;; dbhandles is hash of fname ==> dbdat -;;; (num 0)) -;;; (for-each -;;; (lambda (area-name) -;;; (print "Closing handles for " area-name) -;;; (let ((dbdats (hash-table-ref/default dbhandles area-name '()))) -;;; (for-each -;;; (lambda (dbdat) -;;; ;; first close all statement handles -;;; (for-each -;;; (lambda (sth) -;;; (sqlite3:finalize! sth) -;;; (set! num (+ num 1))) -;;; (hash-table-values (dbdat-sths dbdat))) -;;; ;; now close the dbh -;;; (set! num (+ num 1)) -;;; (sqlite3:finalize! (dbdat-dbh dbdat))) -;;; dbdats))) -;;; (hash-table-keys dbhandles)) -;;; (print "FINALIZED " num " dbhandles"))) -;;; -;;; ;;====================================================================== -;;; ;; W O R K Q U E U E H A N D L I N G -;;; ;;====================================================================== -;;; -;;; (define (register-db-as-mine acfg dbname) -;;; (let ((ht (area-dbs acfg))) -;;; (if (not (hash-table-ref/default ht dbname #f)) -;;; (hash-table-set! ht dbname (random 10000))))) -;;; -;;; (define (work-queue-add acfg fname witem) -;;; (let* ((work-queue-start (current-milliseconds)) -;;; (action (witem-action witem)) ;; NB the action is the index into the rdat actions -;;; (qdat (or (hash-table-ref/default (area-wqueues acfg) fname #f) -;;; (let ((newqdat (make-qdat))) -;;; (hash-table-set! (area-wqueues acfg) fname newqdat) -;;; newqdat))) -;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f))) -;;; (if rdat -;;; (queue-add! -;;; (case (calldat-ctype rdat) -;;; ((dbwrite) (register-db-as-mine acfg fname)(qdat-writeq qdat)) -;;; ((dbread) (register-db-as-mine acfg fname)(qdat-readq qdat)) -;;; ((dbrw) (register-db-as-mine acfg fname)(qdat-rwq qdat)) -;;; ((oslong) (qdat-oslong qdat)) -;;; ((osshort) (qdat-osshort qdat)) -;;; ((full-ping) (qdat-misc qdat)) -;;; (else -;;; (print "ERROR: no queue for " action ". Adding to dbwrite queue.") -;;; (qdat-writeq qdat))) -;;; witem) -;;; (case action -;;; ((full-ping)(qdat-misc qdat)) -;;; (else -;;; (print "ERROR: No action " action " was registered")))) -;;; (sdbg> "work-queue-add" "queue-add" work-queue-start #f #f) -;;; #t)) ;; for now, simply return #t to indicate request got to the queue -;;; -;;; (define (doqueue acfg q fname dbdat dbh) -;;; ;; (print "doqueue: " fname) -;;; (let* ((start-time (current-milliseconds)) -;;; (qlen (queue-length q))) -;;; (if (> qlen 1) -;;; (print "Processing queue of length " qlen)) -;;; (let loop ((count 0) -;;; (responses '())) -;;; (let ((delta (- (current-milliseconds) start-time))) -;;; (if (or (queue-empty? q) -;;; (> delta 400)) ;; stop working on this queue after 400ms have passed -;;; (list count delta responses) ;; return count, delta and responses list -;;; (let* ((witem (queue-remove! q)) -;;; (action (witem-action witem)) -;;; (rdat (witem-rdat witem)) -;;; (stmt (calldat-obj rdat)) -;;; (sth (full-get-sth acfg fname stmt)) -;;; (ctype (calldat-ctype rdat)) -;;; (data (witem-data witem)) -;;; (cookie (witem-cookie witem))) -;;; ;; do the processing and save the result in witem-result -;;; (witem-result-set! -;;; witem -;;; (case ctype ;; action -;;; ((noblockwrite) ;; blind write, no ack of success returned -;;; (apply sqlite3:execute sth data) -;;; (sqlite3:last-insert-rowid dbh)) -;;; ((dbwrite) ;; blocking write -;;; (apply sqlite3:execute sth data) -;;; #t) -;;; ((dbread) ;; TODO: consider breaking this up and shipping in pieces for large query -;;; (apply sqlite3:map-row (lambda x x) sth data)) -;;; ((full-ping) 'full-ping) -;;; (else (print "Not ready for action " action) #f))) -;;; (loop (add1 count) -;;; (if cookie -;;; (cons witem responses) -;;; responses)))))))) -;;; -;;; ;; do up to 400ms of processing on each queue -;;; ;; - the work-queue-processor will allow the max 1200ms of work to complete but it will flag as overloaded -;;; ;; -;;; (define (process-db-queries acfg fname) -;;; (if (hash-table-exists? (area-wqueues acfg) fname) -;;; (let* ((process-db-queries-start-time (current-milliseconds)) -;;; (qdat (hash-table-ref/default (area-wqueues acfg) fname #f)) -;;; (queue-sym->queue (lambda (queue-sym) -;;; (case queue-sym ;; lookup the queue from qdat given a name (symbol) -;;; ((wqueue) (qdat-writeq qdat)) -;;; ((rqueue) (qdat-readq qdat)) -;;; ((rwqueue) (qdat-rwq qdat)) -;;; ((misc) (qdat-misc qdat)) -;;; (else #f)))) -;;; (dbdat (get-dbh acfg fname)) -;;; (dbh (if (dbdat? dbdat)(dbdat-dbh dbdat) #f)) -;;; (nowtime (current-seconds))) -;;; ;; handle the queues that require a transaction -;;; ;; -;;; (map ;; -;;; (lambda (queue-sym) -;;; ;; (print "processing queue " queue-sym) -;;; (let* ((queue (queue-sym->queue queue-sym))) -;;; (if (not (queue-empty? queue)) -;;; (let ((responses -;;; (sqlite3:with-transaction ;; todo - catch exceptions... -;;; dbh -;;; (lambda () -;;; (let* ((res (doqueue acfg queue fname dbdat dbh))) ;; this does the work! -;;; ;; (print "res=" res) -;;; (match res -;;; ((count delta responses) -;;; (update-stats acfg fname queue-sym delta count) -;;; (sdbg> "process-db-queries" "sqlite3-transaction" process-db-queries-start-time #f #f) -;;; responses) ;; return responses -;;; (else -;;; (print "ERROR: bad return data from doqueue " res))) -;;; ))))) -;;; ;; having completed the transaction, send the responses. -;;; ;; (print "INFO: sending " (length responses) " responses.") -;;; (let loop ((responses-left responses)) -;;; (cond -;;; ((null? responses-left) #t) -;;; (else -;;; (let* ((witem (car responses-left)) -;;; (response (cdr responses-left))) -;;; (call-deliver-response acfg (witem-ripaddr witem)(witem-rport witem) -;;; (witem-cookie witem)(witem-result witem))) -;;; (loop (cdr responses-left)))))) -;;; ))) -;;; '(wqueue rwqueue rqueue)) -;;; -;;; ;; handle misc queue -;;; ;; -;;; ;; (print "processing misc queue") -;;; (let ((queue (queue-sym->queue 'misc))) -;;; (doqueue acfg queue fname dbdat dbh)) -;;; ;; .... -;;; (save-dbh acfg fname dbdat) -;;; #t ;; just to let the tests know we got here -;;; ) -;;; #f ;; nothing processed -;;; )) -;;; -;;; ;; run all queues in parallel per db but sequentially per queue for that db. -;;; ;; - process the queues every 500 or so ms -;;; ;; - allow for long running queries to continue but all other activities for that -;;; ;; db will be blocked. -;;; ;; -;;; (define (work-queue-processor acfg) -;;; (let* ((threads (make-hash-table))) ;; fname => thread -;;; (let loop ((fnames (hash-table-keys (area-wqueues acfg))) -;;; (target-time (+ (current-milliseconds) 50))) -;;; ;;(if (not (null? fnames))(print "Processing for these databases: " fnames)) -;;; (for-each -;;; (lambda (fname) -;;; ;; (print "processing for " fname) -;;; ;;(process-db-queries acfg fname)) -;;; (let ((th (hash-table-ref/default threads fname #f))) -;;; (if (and th (not (member (thread-state th) '(dead terminated)))) -;;; (begin -;;; (print "WARNING: worker thread for " fname " is taking a long time.") -;;; (print "Thread is in state " (thread-state th))) -;;; (let ((th1 (make-thread (lambda () -;;; ;; (catch-and-dump -;;; ;; (lambda () -;;; ;; (print "Process queries for " fname) -;;; (let ((start-time (current-milliseconds))) -;;; (process-db-queries acfg fname) -;;; ;; (thread-sleep! 0.01) ;; need the thread to take at least some time -;;; (hash-table-delete! threads fname)) ;; no mutexes? -;;; fname) -;;; "th1"))) ;; )) -;;; (hash-table-set! threads fname th1) -;;; (thread-start! th1))))) -;;; fnames) -;;; ;; (thread-sleep! 0.1) ;; give the threads some time to process requests -;;; ;; burn time until 400ms is up -;;; (let ((now-time (current-milliseconds))) -;;; (if (< now-time target-time) -;;; (let ((delta (- target-time now-time))) -;;; (thread-sleep! (/ delta 1000))))) -;;; (loop (hash-table-keys (area-wqueues acfg)) -;;; (+ (current-milliseconds) 50))))) -;;; -;;; ;;====================================================================== -;;; ;; S T A T S G A T H E R I N G -;;; ;;====================================================================== -;;; -;;; (defstruct stat -;;; (qcount-avg 0) ;; coarse running average -;;; (qtime-avg 0) ;; coarse running average -;;; (qcount 0) ;; total -;;; (qtime 0) ;; total -;;; (last-qcount 0) ;; last -;;; (last-qtime 0) ;; last -;;; (dbs '()) ;; list of db files handled by this node -;;; (when 0)) ;; when the last query happened - seconds -;;; -;;; -;;; (define (update-stats acfg fname bucket duration numqueries) -;;; (let* ((key fname) ;; for now do not use bucket. Was: (conc fname "-" bucket)) ;; lazy but good enough -;;; (stats (or (hash-table-ref/default (area-stats acfg) key #f) -;;; (let ((newstats (make-stat))) -;;; (hash-table-set! (area-stats acfg) key newstats) -;;; newstats)))) -;;; ;; when the last query happended (used to remove the fname from the active list) -;;; (stat-when-set! stats (current-seconds)) -;;; ;; last values -;;; (stat-last-qcount-set! stats numqueries) -;;; (stat-last-qtime-set! stats duration) -;;; ;; total over process lifetime -;;; (stat-qcount-set! stats (+ (stat-qcount stats) numqueries)) -;;; (stat-qtime-set! stats (+ (stat-qtime stats) duration)) -;;; ;; coarse average -;;; (stat-qcount-avg-set! stats (/ (+ (stat-qcount-avg stats) numqueries) 2)) -;;; (stat-qtime-avg-set! stats (/ (+ (stat-qtime-avg stats) duration) 2)) -;;; -;;; ;; here is where we add the stats for a given dbfile -;;; (if (not (member fname (stat-dbs stats))) -;;; (stat-dbs-set! stats (cons fname (stat-dbs stats)))) -;;; -;;; )) -;;; -;;; ;;====================================================================== -;;; ;; S E R V E R S T U F F -;;; ;;====================================================================== -;;; -;;; ;; this does NOT return! -;;; ;; -;;; (define (find-free-port-and-open acfg) -;;; (let ((port (or (area-port acfg) 3200))) -;;; (handle-exceptions -;;; exn -;;; (begin -;;; (print "INFO: cannot bind to port " (rpc:default-server-port) ", trying next port") -;;; (area-port-set! acfg (+ port 1)) -;;; (find-free-port-and-open acfg)) -;;; (rpc:default-server-port port) -;;; (area-port-set! acfg port) -;;; (tcp-read-timeout 120000) -;;; ;; ((rpc:make-server (tcp-listen port)) #t) -;;; (tcp-listen (rpc:default-server-port) -;;; )))) -;;; -;;; ;; register this node by putting a packet into the pkts dir. -;;; ;; look for other servers -;;; ;; contact other servers and compile list of servers -;;; ;; there are two types of server -;;; ;; main servers - dashboards, runners and dedicated servers - need pkt -;;; ;; passive servers - test executers, step calls, list-runs - no pkt -;;; ;; -;;; (define (register-node acfg hostip port-num) -;;; ;;(mutex-lock! (area-mutex acfg)) -;;; (let* ((server-type (area-server-type acfg)) ;; auto, main, passive (no pkt created) -;;; (best-ip (or hostip (get-my-best-address))) -;;; (mtdir (area-dbdir acfg)) -;;; (pktdir (area-pktsdir acfg))) ;; conc mtdir "/.server-pkts"))) -;;; (print "Registering node " best-ip ":" port-num) -;;; (if (not mtdir) ;; require a home for this node to put or find databases -;;; #f -;;; (begin -;;; (if (not (directory? pktdir))(create-directory pktdir)) -;;; ;; server is started, now create pkt if needed -;;; (print "Starting server in " server-type " mode with port " port-num) -;;; (if (member server-type '(auto main)) ;; TODO: if auto, count number of servers registers, if > 3 then don't put out a pkt -;;; (begin -;;; (area-pktid-set! acfg -;;; (write-alist->pkt -;;; pktdir -;;; `((hostname . ,(get-host-name)) -;;; (ipaddr . ,best-ip) -;;; (port . ,port-num) -;;; (pid . ,(current-process-id))) -;;; pktspec: *pktspec* -;;; ptype: 'server)) -;;; (area-pktfile-set! acfg (conc pktdir "/" (area-pktid acfg) ".pkt")))) -;;; (area-port-set! acfg port-num) -;;; #;(mutex-unlock! (area-mutex acfg)))))) -;;; -;;; (define *cookie-seqnum* 0) -;;; (define (make-cookie key) -;;; (set! *cookie-seqnum* (add1 *cookie-seqnum*)) -;;; ;;(print "MAKE COOKIE CALLED -- on "servkey"-"*cookie-seqnum*) -;;; (conc key "-" *cookie-seqnum*) -;;; ) -;;; -;;; ;; dispatch locally if possible -;;; ;; -;;; (define (call-deliver-response acfg ipaddr port cookie data) -;;; (if (and (equal? (area-myaddr acfg) ipaddr) -;;; (equal? (area-port acfg) port)) -;;; (deliver-response acfg cookie data) -;;; ((rpc:procedure 'response ipaddr port) cookie data))) -;;; -;;; (define (deliver-response acfg cookie data) -;;; (let ((deliver-response-start (current-milliseconds))) -;;; (thread-start! (make-thread -;;; (lambda () -;;; (let loop ((tries-left 5)) -;;; ;;(print "TOP OF DELIVER_RESPONSE LOOP; triesleft="tries-left) -;;; ;;(pp (hash-table->alist (area-cookie2mbox acfg))) -;;; (let* ((mbox (hash-table-ref/default (area-cookie2mbox acfg) cookie #f))) -;;; (cond -;;; ((eq? 0 tries-left) -;;; (print "ulex:deliver-response: I give up. Mailbox never appeared. cookie="cookie) -;;; ) -;;; (mbox -;;; ;;(print "got mbox="mbox" got data="data" send.") -;;; (mailbox-send! mbox data)) -;;; (else -;;; ;;(print "no mbox yet. look for "cookie) -;;; (thread-sleep! (/ (- 6 tries-left) 10)) -;;; (loop (sub1 tries-left)))))) -;;; ;; (debug-pp (list (conc "ulex:deliver-response took " (- (current-milliseconds) deliver-response-start) " ms, cookie=" cookie " data=") data)) -;;; (sdbg> "deliver-response" "mailbox-send" deliver-response-start #f #f cookie) -;;; ) -;;; (conc "deliver-response thread for cookie="cookie)))) -;;; #t) -;;; -;;; ;; action: -;;; ;; immediate - quick actions, no need to put in queues -;;; ;; dbwrite - put in dbwrite queue -;;; ;; dbread - put in dbread queue -;;; ;; oslong - os actions, e.g. du, that could take a long time -;;; ;; osshort - os actions that should be quick, e.g. df -;;; ;; -;;; (define (request acfg from-ipaddr from-port servkey action cookie fname params) ;; std-peer-handler -;;; ;; NOTE: Use rpc:current-peer for getting return address -;;; (let* ((std-peer-handler-start (current-milliseconds)) -;;; ;; (raw-data (alist-ref 'data dat)) -;;; (rdat (hash-table-ref/default -;;; (area-rtable acfg) action #f)) ;; this looks up the sql query or other details indexed by the action -;;; (witem (make-witem ripaddr: from-ipaddr ;; rhost: from-host -;;; rport: from-port action: action -;;; rdat: rdat cookie: cookie -;;; servkey: servkey data: params ;; TODO - rename data to params -;;; caller: (rpc:current-peer)))) -;;; (if (not (equal? servkey (area-pktid acfg))) -;;; `(#f . ,(conc "I don't know you servkey=" servkey ", pktid=" (area-pktid acfg))) ;; immediately return this -;;; (let* ((ctype (if rdat -;;; (calldat-ctype rdat) ;; is this necessary? these should be identical -;;; action))) -;;; (sdbg> "std-peer-handler" "immediate" std-peer-handler-start #f #f) -;;; (case ctype -;;; ;; (dbwrite acfg rdat (cons from-ipaddr from-port) data))) -;;; ((full-ping) `(#t "ack to full ping" ,(work-queue-add acfg fname witem) ,cookie)) -;;; ((response) `(#t "ack from requestor" ,(deliver-response acfg fname params))) -;;; ((dbwrite) `(#t "db write submitted" ,(work-queue-add acfg fname witem) ,cookie)) -;;; ((dbread) `(#t "db read submitted" ,(work-queue-add acfg fname witem) ,cookie )) -;;; ((dbrw) `(#t "db read/write submitted" ,cookie)) -;;; ((osshort) `(#t "os short submitted" ,cookie)) -;;; ((oslong) `(#t "os long submitted" ,cookie)) -;;; (else `(#f "unrecognised action" ,ctype))))))) -;;; -;;; ;; Call this to start the actual server -;;; ;; -;;; ;; start_server -;;; ;; -;;; ;; mode: ' -;;; ;; handler: proc which takes pktrecieved as argument -;;; ;; -;;; -;;; (define (start-server acfg) -;;; (let* ((conn (find-free-port-and-open acfg)) -;;; (port (area-port acfg))) -;;; (rpc:publish-procedure! -;;; 'delist-db -;;; (lambda (fname) -;;; (hash-table-delete! (area-dbs acfg) fname))) -;;; (rpc:publish-procedure! -;;; 'calling-addr -;;; (lambda () -;;; (rpc:current-peer))) -;;; (rpc:publish-procedure! -;;; 'ping -;;; (lambda ()(real-ping acfg))) -;;; (rpc:publish-procedure! -;;; 'request -;;; (lambda (from-addr from-port servkey action cookie dbname params) -;;; (request acfg from-addr from-port servkey action cookie dbname params))) -;;; (rpc:publish-procedure! -;;; 'response -;;; (lambda (cookie res-dat) -;;; (deliver-response acfg cookie res-dat))) -;;; (area-ready-set! acfg #t) -;;; (area-conn-set! acfg conn) -;;; ((rpc:make-server conn) #f)));; ((tcp-listen (rpc:default-server-port)) #t) -;;; -;;; -;;; (define (launch acfg) ;; #!optional (proc std-peer-handler)) -;;; (print "starting launch") -;;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) -;;; #;(let ((original-handler (current-exception-handler))) ;; is th -;;; (lambda (exception) -;;; (server-exit-procedure) -;;; (original-handler exception))) -;;; (on-exit (lambda () -;;; (shutdown acfg))) ;; (finalize-all-db-handles acfg))) -;;; ;; set up the rpc handler -;;; (let* ((th1 (make-thread -;;; (lambda ()(start-server acfg)) -;;; "server thread")) -;;; (th2 (make-thread -;;; (lambda () -;;; (print "th2 starting") -;;; (let loop () -;;; (work-queue-processor acfg) -;;; (print "work-queue-processor crashed!") -;;; (loop))) -;;; "work queue thread"))) -;;; (thread-start! th1) -;;; (thread-start! th2) -;;; (let loop () -;;; (thread-sleep! 0.025) -;;; (if (area-ready acfg) -;;; #t -;;; (loop))) -;;; ;; attempt to fix my address -;;; (let* ((all-addr (get-all-ips-sorted))) ;; could use (tcp-addresses conn)? -;;; (let loop ((rem-addrs all-addr)) -;;; (if (null? rem-addrs) -;;; (begin -;;; (print "ERROR: Failed to figure out the ip address of myself as a server. Giving up.") -;;; (exit 1)) ;; BUG Changeme to raising an exception -;;; -;;; (let* ((addr (car rem-addrs)) -;;; (good-addr (handle-exceptions -;;; exn -;;; #f -;;; ((rpc:procedure 'calling-addr addr (area-port acfg)))))) -;;; (if good-addr -;;; (begin -;;; (print "Got good-addr of " good-addr) -;;; (area-myaddr-set! acfg good-addr)) -;;; (loop (cdr rem-addrs))))))) -;;; (register-node acfg (area-myaddr acfg)(area-port acfg)) -;;; (print "INFO: Server started on " (area-myaddr acfg) ":" (area-port acfg)) -;;; ;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) -;;; )) -;;; -;;; (define (clear-server-pkt acfg) -;;; (let ((pktf (area-pktfile acfg))) -;;; (if pktf (delete-file* pktf)))) -;;; -;;; (define (shutdown acfg) -;;; (let (;;(conn (area-conn acfg)) -;;; (pktf (area-pktfile acfg)) -;;; (port (area-port acfg))) -;;; (if pktf (delete-file* pktf)) -;;; (send-all "imshuttingdown") -;;; ;; (rpc:close-all-connections!) ;; don't know if this is actually needed -;;; (finalize-all-db-handles acfg))) -;;; -;;; (define (send-all msg) -;;; #f) -;;; -;;; ;; given a area record look up all the packets -;;; ;; -;;; (define (get-all-server-pkts acfg) -;;; (let ((all-pkt-files (glob (conc (area-pktsdir acfg) "/*.pkt")))) -;;; (map (lambda (pkt-file) -;;; (read-pkt->alist pkt-file pktspec: *pktspec*)) -;;; all-pkt-files))) -;;; -;;; #;((Z . "9a0212302295a19610d5796fce0370fa130758e9") -;;; (port . "34827") -;;; (pid . "28748") -;;; (hostname . "zeus") -;;; (T . "server") -;;; (D . "1549427032.0")) -;;; -;;; #;(define (get-my-best-address) -;;; (let ((all-my-addresses (get-all-ips))) ;; (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))) -;;; (cond -;;; ((null? all-my-addresses) -;;; (get-host-name)) ;; no interfaces? -;;; ((eq? (length all-my-addresses) 1) -;;; (ip->string (car all-my-addresses))) ;; only one to choose from, just go with it -;;; (else -;;; (ip->string (car (filter (lambda (x) ;; take any but 127. -;;; (not (eq? (u8vector-ref x 0) 127))) -;;; all-my-addresses))))))) -;;; -;;; ;; whoami? I am my pkt -;;; ;; -;;; (define (whoami? acfg) -;;; (hash-table-ref/default (area-hosts acfg)(area-pktid acfg) #f)) -;;; -;;; ;;====================================================================== -;;; ;; "Client side" operations -;;; ;;====================================================================== -;;; -;;; (define (safe-call call-key host port . params) -;;; (handle-exceptions -;;; exn -;;; (begin -;;; (print "Call " call-key " to " host ":" port " failed") -;;; #f) -;;; (apply (rpc:procedure call-key host port) params))) -;;; -;;; ;; ;; convert to/from string / sexpr -;;; ;; -;;; ;; (define (string->sexpr str) -;;; ;; (if (string? str) -;;; ;; (with-input-from-string str read) -;;; ;; str)) -;;; ;; -;;; ;; (define (sexpr->string s) -;;; ;; (with-output-to-string (lambda ()(write s)))) -;;; -;;; ;; is the server alive? -;;; ;; -;;; (define (ping acfg host port) -;;; (let* ((myaddr (area-myaddr acfg)) -;;; (myport (area-port acfg)) -;;; (start-time (current-milliseconds)) -;;; (res (if (and (equal? myaddr host) -;;; (equal? myport port)) -;;; (real-ping acfg) -;;; ((rpc:procedure 'ping host port))))) -;;; (cons (- (current-milliseconds) start-time) -;;; res))) -;;; -;;; ;; returns ( ipaddr port alist-fname=>randnum ) -;;; (define (real-ping acfg) -;;; `(,(area-myaddr acfg) ,(area-port acfg) ,(get-host-stats acfg))) -;;; -;;; ;; is the server alive AND the queues processing? -;;; ;; -;;; #;(define (full-ping acfg servpkt) -;;; (let* ((start-time (current-milliseconds)) -;;; (res (send-message acfg servpkt '(full-ping) 'full-ping))) -;;; (cons (- (current-milliseconds) start-time) -;;; res))) ;; (equal? res "got ping")))) -;;; -;;; -;;; ;; look up all pkts and get the server id (the hash), port, host/ip -;;; ;; store this info in acfg -;;; ;; return the number of responsive servers found -;;; ;; -;;; ;; DO NOT VERIFY THAT THE SERVER IS ALIVE HERE. This is called at times where the current server is not yet alive and cannot ping itself -;;; ;; -;;; (define (update-known-servers acfg) -;;; ;; readll all pkts -;;; ;; foreach pkt; if it isn't me ping the server; if alive, add to hosts hash, else rm the pkt -;;; (let* ((start-time (current-milliseconds)) -;;; (all-pkts (delete-duplicates -;;; (append (get-all-server-pkts acfg) -;;; (hash-table-values (area-hosts acfg))))) -;;; (hostshash (area-hosts acfg)) -;;; (my-id (area-pktid acfg)) -;;; (pktsdir (area-pktsdir acfg)) ;; needed to remove pkts from non-responsive servers -;;; (numsrvs 0) -;;; (delpkt (lambda (pktsdir sid) -;;; (print "clearing out server " sid) -;;; (delete-file* (conc pktsdir "/" sid ".pkt")) -;;; (hash-table-delete! hostshash sid)))) -;;; (area-last-srvup-set! acfg (current-seconds)) -;;; (for-each -;;; (lambda (servpkt) -;;; (if (list? servpkt) -;;; ;; (pp servpkt) -;;; (let* ((shost (alist-ref 'ipaddr servpkt)) -;;; (sport (any->number (alist-ref 'port servpkt))) -;;; (res (handle-exceptions -;;; exn -;;; (begin -;;; ;; (print "INFO: bad server on " shost ":" sport) -;;; #f) -;;; (ping acfg shost sport))) -;;; (sid (alist-ref 'Z servpkt)) ;; Z code is our name for the server -;;; (url (conc shost ":" sport)) -;;; ) -;;; #;(if (or (not res) -;;; (null? res)) -;;; (begin -;;; (print "STRANGE: ping of " url " gave " res))) -;;; -;;; ;; (print "Got " res " from " shost ":" sport) -;;; (match res -;;; ((qduration . payload) -;;; ;; (print "Server pkt:" (alist-ref 'ipaddr servpkt) ":" (alist-ref 'port servpkt) -;;; ;; (if payload -;;; ;; "Success" "Fail")) -;;; (match payload -;;; ((host port stats) -;;; ;; (print "From " host ":" port " got stats: " stats) -;;; (if (and host port stats) -;;; (let ((url (conc host ":" port))) -;;; (hash-table-set! hostshash sid servpkt) -;;; ;; store based on host:port -;;; (hash-table-set! (area-hoststats acfg) sid stats)) -;;; (print "missing data from the server, not sure what that means!")) -;;; (set! numsrvs (+ numsrvs 1))) -;;; (#f -;;; (print "Removing pkt " sid " due to #f from server or failed ping") -;;; (delpkt pktsdir sid)) -;;; (else -;;; (print "Got ")(pp res)(print " from server ")(pp servpkt) " but response did not match (#f/#t . msg)"))) -;;; (else -;;; ;; here we delete the pkt - can't reach the server, remove it -;;; ;; however this logic is inadequate. we should mark the server as checked -;;; ;; and not good, if it happens a second time - then remove the pkt -;;; ;; or something similar. I.e. don't be too quick to assume the server is wedged or dead -;;; ;; could be it is simply too busy to reply -;;; (let ((bad-pings (hash-table-ref/default (area-health acfg) url 0))) -;;; (if (> bad-pings 1) ;; two bad pings - remove pkt -;;; (begin -;;; (print "INFO: " bad-pings " bad responses from " url ", deleting pkt " sid) -;;; (delpkt pktsdir sid)) -;;; (begin -;;; (print "INFO: " bad-pings " bad responses from " shost ":" sport " not deleting pkt yet") -;;; (hash-table-set! (area-health acfg) -;;; url -;;; (+ (hash-table-ref/default (area-health acfg) url 0) 1)) -;;; )) -;;; )))) -;;; ;; servpkt is not actually a pkt? -;;; (begin -;;; (print "Bad pkt " servpkt)))) -;;; all-pkts) -;;; (sdbg> "update-known-servers" "end" start-time #f #f " found " numsrvs -;;; " servers, pkts: " (map (lambda (p) -;;; (alist-ref 'Z p)) -;;; all-pkts)) -;;; numsrvs)) -;;; -;;; (defstruct srvstat -;;; (numfiles 0) ;; number of db files handled by this server - subtract 1 for the db being currently looked at -;;; (randnum #f) ;; tie breaker number assigned to by the server itself - applies only to the db under consideration -;;; (pkt #f)) ;; the server pkt -;;; -;;; ;;(define (srv->srvstat srvpkt) -;;; -;;; ;; Get the server best for given dbname and key -;;; ;; -;;; ;; NOTE: key is not currently used. The key points to the kind of query, this may be useful for directing read-only queries. -;;; ;; -;;; (define (get-best-server acfg dbname key) -;;; (let* (;; (servers (hash-table-values (area-hosts acfg))) -;;; (servers (area-hosts acfg)) -;;; (skeys (sort (hash-table-keys servers) string>=?)) ;; a stable listing -;;; (start-time (current-milliseconds)) -;;; (srvstats (make-hash-table)) ;; srvid => srvstat -;;; (url (conc (area-myaddr acfg) ":" (area-port acfg)))) -;;; ;; (print "scores for " dbname ": " (map (lambda (k)(cons k (calc-server-score acfg dbname k))) skeys)) -;;; (if (null? skeys) -;;; (if (> (update-known-servers acfg) 0) -;;; (get-best-server acfg dbname key) ;; some risk of infinite loop here, TODO add try counter -;;; (begin -;;; (print "ERROR: no server found!") ;; since this process is also a server this should never happen -;;; #f)) -;;; (begin -;;; ;; (print "in get-best-server with skeys=" skeys) -;;; (if (> (- (current-seconds) (area-last-srvup acfg)) 10) -;;; (begin -;;; (update-known-servers acfg) -;;; (sdbg> "get-best-server" "update-known-servers" start-time #f #f))) -;;; -;;; ;; for each server look at the list of dbfiles, total number of dbs being handled -;;; ;; and the rand number, save the best host -;;; ;; also do a delist-db for each server dbfile not used -;;; (let* ((best-server #f) -;;; (servers-to-delist (make-hash-table))) -;;; (for-each -;;; (lambda (srvid) -;;; (let* ((server (hash-table-ref/default servers srvid #f)) -;;; (stats (hash-table-ref/default (area-hoststats acfg) srvid '(())))) -;;; ;; (print "stats: " stats) -;;; (if server -;;; (let* ((dbweights (car stats)) -;;; (srvload (length (filter (lambda (x)(not (equal? dbname (car x)))) dbweights))) -;;; (dbrec (alist-ref dbname dbweights equal?)) ;; get the pair with fname . randscore -;;; (randnum (if dbrec -;;; dbrec ;; (cdr dbrec) -;;; 0))) -;;; (hash-table-set! srvstats srvid (make-srvstat numfiles: srvload randnum: randnum pkt: server)))))) -;;; skeys) -;;; -;;; (let* ((sorted (sort (hash-table-values srvstats) -;;; (lambda (a b) -;;; (let ((numfiles-a (srvstat-numfiles a)) -;;; (numfiles-b (srvstat-numfiles b)) -;;; (randnum-a (srvstat-randnum a)) -;;; (randnum-b (srvstat-randnum b))) -;;; (if (< numfiles-a numfiles-b) ;; Note, I don't think adding an offset works here. Goal was only move file handling to a different server if it has 2 less -;;; #t -;;; (if (and (equal? numfiles-a numfiles-b) -;;; (< randnum-a randnum-b)) -;;; #t -;;; #f)))))) -;;; (best (if (null? sorted) -;;; (begin -;;; (print "ERROR: should never be null due to self as server.") -;;; #f) -;;; (srvstat-pkt (car sorted))))) -;;; #;(print "SERVER(" url "): " dbname ": " (map (lambda (srv) -;;; (let ((p (srvstat-pkt srv))) -;;; (conc (alist-ref 'ipaddr p) ":" (alist-ref 'port p) -;;; "(" (srvstat-numfiles srv)","(srvstat-randnum srv)")"))) -;;; sorted)) -;;; best)))))) -;;; -;;; ;; send out an "I'm about to exit notice to all known servers" -;;; ;; -;;; (define (death-imminent acfg) -;;; '()) -;;; -;;; ;;====================================================================== -;;; ;; U L E X - T H E I N T E R E S T I N G S T U F F ! ! -;;; ;;====================================================================== -;;; -;;; ;; register a handler -;;; ;; NOTES: -;;; ;; dbinitsql is reserved for a list of sql statements for initializing the db -;;; ;; dbinitfn is reserved for a db init function, if exists called after dbinitsql -;;; ;; -;;; (define (register acfg key obj #!optional (ctype 'dbwrite)) -;;; (let ((ht (area-rtable acfg))) -;;; (if (hash-table-exists? ht key) -;;; (print "WARNING: redefinition of entry " key)) -;;; (hash-table-set! ht key (make-calldat obj: obj ctype: ctype)))) -;;; -;;; ;; usage: register-batch acfg '((key1 . sql1) (key2 . sql2) ... ) -;;; ;; NB// obj is often an sql query -;;; ;; -;;; (define (register-batch acfg ctype data) -;;; (let ((ht (area-rtable acfg))) -;;; (map (lambda (dat) -;;; (hash-table-set! ht (car dat)(make-calldat obj: (cdr dat) ctype: ctype))) -;;; data))) -;;; -;;; (define (initialize-area-calls-from-specfile area specfile) -;;; (let* ((callspec (with-input-from-file specfile read ))) -;;; (for-each (lambda (group) -;;; (register-batch -;;; area -;;; (car group) -;;; (cdr group))) -;;; callspec))) -;;; -;;; ;; get-rentry -;;; ;; -;;; (define (get-rentry acfg key) -;;; (hash-table-ref/default (area-rtable acfg) key #f)) -;;; -;;; (define (get-rsql acfg key) -;;; (let ((cdat (get-rentry acfg key))) -;;; (if cdat -;;; (calldat-obj cdat) -;;; #f))) -;;; -;;; -;;; -;;; ;; blocking call: -;;; ;; client server -;;; ;; ------ ------ -;;; ;; call() -;;; ;; send-message() -;;; ;; nmsg-send() -;;; ;; nmsg-receive() -;;; ;; nmsg-respond(ack,cookie) -;;; ;; ack, cookie -;;; ;; mbox-thread-wait(cookie) -;;; ;; nmsg-send(client,cookie,result) -;;; ;; nmsg-respond(ack) -;;; ;; return result -;;; ;; -;;; ;; reserved action: -;;; ;; 'immediate -;;; ;; 'dbinitsql -;;; ;; -;;; (define (call acfg dbname action params #!optional (count 0)) -;;; (let* ((call-start-time (current-milliseconds)) -;;; (srv (get-best-server acfg dbname action)) -;;; (post-get-start-time (current-milliseconds)) -;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f)) -;;; (myid (trim-pktid (area-pktid acfg))) -;;; (srvid (trim-pktid (alist-ref 'Z srv))) -;;; (cookie (make-cookie myid))) -;;; (sdbg> "call" "get-best-server" call-start-time #f call-start-time " from: " myid " to server: " srvid " for " dbname " action: " action " params: " params " rdat: " rdat) -;;; (print "INFO: call to " (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv) " from " (area-myaddr acfg) ":" (area-port acfg) " for " dbname) -;;; (if (and srv rdat) ;; need both to dispatch a request -;;; (let* ((ripaddr (alist-ref 'ipaddr srv)) -;;; (rsrvid (alist-ref 'Z srv)) -;;; (rport (any->number (alist-ref 'port srv))) -;;; (res-full (if (and (equal? ripaddr (area-myaddr acfg)) -;;; (equal? rport (area-port acfg))) -;;; (request acfg ripaddr rport (area-pktid acfg) action cookie dbname params) -;;; (safe-call 'request ripaddr rport -;;; (area-myaddr acfg) -;;; (area-port acfg) -;;; #;(area-pktid acfg) -;;; rsrvid -;;; action cookie dbname params)))) -;;; ;; (print "res-full: " res-full) -;;; (match res-full -;;; ((response-ok response-msg rem ...) -;;; (let* ((send-message-time (current-milliseconds)) -;;; ;; (match res-full -;;; ;; ((response-ok response-msg) -;;; ;; (response-ok (car res-full)) -;;; ;; (response-msg (cadr res-full) -;;; ) -;;; ;; (res (take res-full 3))) ;; ctype == action, TODO: converge on one term <<=== what was this? BUG -;;; ;; (print "ulex:call: send-message took " (- send-message-time post-get-start-time) " ms params=" params) -;;; (sdbg> "call" "send-message" post-get-start-time #f call-start-time) -;;; (cond -;;; ((not response-ok) #f) -;;; ((member response-msg '("db read submitted" "db write submitted")) -;;; (let* ((cookie-id (cadddr res-full)) -;;; (mbox (make-mailbox)) -;;; (mbox-time (current-milliseconds))) -;;; (hash-table-set! (area-cookie2mbox acfg) cookie-id mbox) -;;; (let* ((mbox-timeout-secs 20) -;;; (mbox-timeout-result 'MBOX_TIMEOUT) -;;; (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) -;;; (mbox-receive-time (current-milliseconds))) -;;; (hash-table-delete! (area-cookie2mbox acfg) cookie-id) -;;; (sdbg> "call" "mailbox-receive" mbox-time #f call-start-time " from: " myid " to server: " srvid " for " dbname) -;;; ;; (print "ulex:call mailbox-receive took " (- mbox-receive-time mbox-time) "ms params=" params) -;;; res))) -;;; (else -;;; (print "Unhandled response \""response-msg"\"") -;;; #f)) -;;; ;; depending on what action (i.e. ctype) is we will block here waiting for -;;; ;; all the data (mechanism to be determined) -;;; ;; -;;; ;; if res is a "working on it" then wait -;;; ;; wait for result -;;; ;; mailbox thread wait on -;;; -;;; ;; if res is a "can't help you" then try a different server -;;; ;; if res is a "ack" (e.g. for one-shot requests) then return res -;;; )) -;;; (else -;;; (if (< count 10) -;;; (let* ((url (conc (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv)))) -;;; (thread-sleep! 1) -;;; (print "ERROR: Bad result from " url ", dbname: " dbname ", action: " action ", params: " params ". Trying again in 1 second.") -;;; (call acfg dbname action params (+ count 1))) -;;; (begin -;;; (error (conc "ERROR: " count " tries, still have improper response res-full=" res-full))))))) -;;; (begin -;;; (if (not rdat) -;;; (print "ERROR: action " action " not registered.") -;;; (if (< count 10) -;;; (begin -;;; (thread-sleep! 1) -;;; (area-hosts-set! acfg (make-hash-table)) ;; clear out all known hosts -;;; (print "ERROR: no server found, srv=" srv ", trying again in 1 seconds") -;;; (call acfg dbname action params (+ count 1))) -;;; (begin -;;; (error (conc "ERROR: no server found after 10 tries, srv=" srv ", giving up.")) -;;; #;(error "No server available")))))))) -;;; -;;; -;;; ;;====================================================================== -;;; ;; U T I L I T I E S -;;; ;;====================================================================== -;;; -;;; ;; get a signature for identifing this process -;;; ;; -;;; (define (get-process-signature) -;;; (cons (get-host-name)(current-process-id))) -;;; -;;; ;;====================================================================== -;;; ;; S Y S T E M S T U F F -;;; ;;====================================================================== -;;; -;;; ;; get normalized cpu load by reading from /proc/loadavg and -;;; ;; /proc/cpuinfo return all three values and the number of real cpus -;;; ;; and the number of threads returns alist '((adj-cpu-load -;;; ;; . normalized-proc-load) ... etc. keys: adj-proc-load, -;;; ;; adj-core-load, 1m-load, 5m-load, 15m-load -;;; ;; -;;; (define (get-normalized-cpu-load) -;;; (let ((res (get-normalized-cpu-load-raw)) -;;; (default `((adj-proc-load . 2) ;; there is no right answer -;;; (adj-core-load . 2) -;;; (1m-load . 2) -;;; (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong -;;; (15m-load . 0) -;;; (proc . 1) -;;; (core . 1) -;;; (phys . 1) -;;; (error . #t)))) -;;; (cond -;;; ((and (list? res) -;;; (> (length res) 2)) -;;; res) -;;; ((eq? res #f) default) ;; add messages? -;;; ((eq? res #f) default) ;; this would be the #eof -;;; (else default)))) -;;; -;;; (define (get-normalized-cpu-load-raw) -;;; (let* ((actual-host (get-host-name))) ;; #f is localhost -;;; (let ((data (append -;;; (with-input-from-file "/proc/loadavg" read-lines) -;;; (with-input-from-file "/proc/cpuinfo" read-lines) -;;; (list "end"))) -;;; (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) -;;; (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) -;;; (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) -;;; (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) -;;; (max-num (lambda (p n)(max (string->number p) n)))) -;;; ;; (print "data=" data) -;;; (if (null? data) ;; something went wrong -;;; #f -;;; (let loop ((hed (car data)) -;;; (tal (cdr data)) -;;; (loads #f) -;;; (proc-num 0) ;; processor includes threads -;;; (phys-num 0) ;; physical chip on motherboard -;;; (core-num 0)) ;; core -;;; ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) -;;; (if (null? tal) ;; have all our data, calculate normalized load and return result -;;; (let* ((act-proc (+ proc-num 1)) -;;; (act-phys (+ phys-num 1)) -;;; (act-core (+ core-num 1)) -;;; (adj-proc-load (/ (car loads) act-proc)) -;;; (adj-core-load (/ (car loads) act-core)) -;;; (result -;;; (append (list (cons 'adj-proc-load adj-proc-load) -;;; (cons 'adj-core-load adj-core-load)) -;;; (list (cons '1m-load (car loads)) -;;; (cons '5m-load (cadr loads)) -;;; (cons '15m-load (caddr loads))) -;;; (list (cons 'proc act-proc) -;;; (cons 'core act-core) -;;; (cons 'phys act-phys))))) -;;; result) -;;; (regex-case -;;; hed -;;; (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) -;;; (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) -;;; (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) -;;; (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) -;;; (else -;;; (begin -;;; ;; (print "NO MATCH: " hed) -;;; (loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))) -;;; -;;; (define (get-host-stats acfg) -;;; (let ((stats-hash (area-stats acfg))) -;;; ;; use this opportunity to remove references to dbfiles which have not been accessed in a while -;;; (for-each -;;; (lambda (dbname) -;;; (let* ((stats (hash-table-ref stats-hash dbname)) -;;; (last-access (stat-when stats))) -;;; (if (and (> last-access 0) ;; if zero then there has been no access -;;; (> (- (current-seconds) last-access) 10)) ;; not used in ten seconds -;;; (begin -;;; (print "Removing " dbname " from stats list") -;;; (hash-table-delete! stats-hash dbname) ;; remove from stats hash -;;; (stat-dbs-set! stats (hash-table-keys stats)))))) -;;; (hash-table-keys stats-hash)) -;;; -;;; `(,(hash-table->alist (area-dbs acfg)) ;; dbname => randnum -;;; ,(map (lambda (dbname) ;; dbname is the db name -;;; (cons dbname (stat-when (hash-table-ref stats-hash dbname)))) -;;; (hash-table-keys stats-hash)) -;;; (cpuload . ,(get-normalized-cpu-load))))) -;;; #;(stats . ,(map (lambda (k) ;; create an alist from the stats data -;;; (cons k (stat->alist (hash-table-ref (area-stats acfg) k)))) -;;; (hash-table-keys (area-stats acfg)))) -;;; -;;; #;(trace -;;; ;; assv -;;; ;; cdr -;;; ;; caar -;;; ;; ;; cdr -;;; ;; call -;;; ;; finalize-all-db-handles -;;; ;; get-all-server-pkts -;;; ;; get-normalized-cpu-load -;;; ;; get-normalized-cpu-load-raw -;;; ;; launch -;;; ;; nmsg-send -;;; ;; process-db-queries -;;; ;; receive-message -;;; ;; std-peer-handler -;;; ;; update-known-servers -;;; ;; work-queue-processor -;;; ) -;;; -;;; ;;====================================================================== -;;; ;; netutil -;;; ;; move this back to ulex-netutil.scm someday? -;;; ;;====================================================================== -;;; -;;; ;; #include -;;; ;; #include -;;; ;; #include -;;; ;; #include -;;; -;;; (foreign-declare "#include \"sys/types.h\"") -;;; (foreign-declare "#include \"sys/socket.h\"") -;;; (foreign-declare "#include \"ifaddrs.h\"") -;;; (foreign-declare "#include \"arpa/inet.h\"") -;;; -;;; ;; get IP addresses from ALL interfaces -;;; (define get-all-ips -;;; (foreign-safe-lambda* scheme-object () -;;; " -;;; -;;; // from https://stackoverflow.com/questions/17909401/linux-c-get-default-interfaces-ip-address : -;;; -;;; -;;; C_word lst = C_SCHEME_END_OF_LIST, len, str, *a; -;;; // struct ifaddrs *ifa, *i; -;;; // struct sockaddr *sa; -;;; -;;; struct ifaddrs * ifAddrStruct = NULL; -;;; struct ifaddrs * ifa = NULL; -;;; void * tmpAddrPtr = NULL; -;;; -;;; if ( getifaddrs(&ifAddrStruct) != 0) -;;; C_return(C_SCHEME_FALSE); -;;; -;;; // for (i = ifa; i != NULL; i = i->ifa_next) { -;;; for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) { -;;; if (ifa->ifa_addr->sa_family==AF_INET) { // Check it is -;;; // a valid IPv4 address -;;; tmpAddrPtr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; -;;; char addressBuffer[INET_ADDRSTRLEN]; -;;; inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN); -;;; // printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); -;;; len = strlen(addressBuffer); -;;; a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); -;;; str = C_string(&a, len, addressBuffer); -;;; lst = C_a_pair(&a, str, lst); -;;; } -;;; -;;; // else if (ifa->ifa_addr->sa_family==AF_INET6) { // Check it is -;;; // // a valid IPv6 address -;;; // tmpAddrPtr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr; -;;; // char addressBuffer[INET6_ADDRSTRLEN]; -;;; // inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN); -;;; //// printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); -;;; // len = strlen(addressBuffer); -;;; // a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); -;;; // str = C_string(&a, len, addressBuffer); -;;; // lst = C_a_pair(&a, str, lst); -;;; // } -;;; -;;; // else { -;;; // printf(\" not an IPv4 address\\n\"); -;;; // } -;;; -;;; } -;;; -;;; freeifaddrs(ifa); -;;; C_return(lst); -;;; -;;; ")) -;;; -;;; ;; Change this to bias for addresses with a reasonable broadcast value? -;;; ;; -;;; (define (ip-pref-less? a b) -;;; (let* ((rate (lambda (ipstr) -;;; (regex-case ipstr -;;; ( "^127\\." _ 0 ) -;;; ( "^(10\\.0|192\\.168\\.)\\..*" _ 1 ) -;;; ( else 2 ) )))) -;;; (< (rate a) (rate b)))) -;;; -;;; -;;; (define (get-my-best-address) -;;; (let ((all-my-addresses (get-all-ips)) -;;; ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) -;;; ) -;;; (cond -;;; ((null? all-my-addresses) -;;; (get-host-name)) ;; no interfaces? -;;; ((eq? (length all-my-addresses) 1) -;;; (car all-my-addresses)) ;; only one to choose from, just go with it -;;; -;;; (else -;;; (car (sort all-my-addresses ip-pref-less?))) -;;; ;; (else -;;; ;; (ip->string (car (filter (lambda (x) ;; take any but 127. -;;; ;; (not (eq? (u8vector-ref x 0) 127))) -;;; ;; all-my-addresses)))) -;;; -;;; ))) -;;; -;;; (define (get-all-ips-sorted) -;;; (sort (get-all-ips) ip-pref-less?)) -;;; -;;; - + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +) ADDED utils/load-the-db.scm Index: utils/load-the-db.scm ================================================================== --- /dev/null +++ utils/load-the-db.scm @@ -0,0 +1,41 @@ +;; start the repl and then load this file + +(define start-time (current-seconds)) + +(let loop ((last-print 0) + (num-calls 0)) + (let* ((all-run-ids (rmt:get-all-run-ids)) + (do-print (> (- (current-seconds) last-print) 2)) + (max-query 0) + (num-calls (+ num-calls + 1 ;; account for call above + (length all-run-ids) ;; account for the get-tests-for-run in the for-each below + ))) + (for-each + (lambda (run-id) + ;; (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + (let* ((all-run-data (rmt:get-tests-for-run run-id "%" '() '() #f #f #f #f #f #f #f #f))) + (set! num-calls (+ num-calls (length all-run-data))) + (for-each + (lambda (testdat) + (let* ((test-id (vector-ref testdat 0)) + (start-at (current-milliseconds)) + (testinfo (rmt:get-test-info-by-id run-id test-id)) + (query-time (- (current-milliseconds) start-at))) + (if (> query-time max-query) + (set! max-query query-time)))) + all-run-data) + (if do-print + (let* ((run-time (- (current-seconds) start-time)) + (qry-rate (if (> run-time 0) + (inexact->exact (round (/ num-calls run-time))) + -1))) + (print "Running "run-time"s, run "run-id + " has "(length all-run-data)" tests, max query "max-query + "ms with avg query rate "qry-rate" qry/s"))))) + all-run-ids) + (loop (if do-print + (current-seconds) + last-print) + num-calls))) + Index: utils/nbfake ================================================================== --- utils/nbfake +++ utils/nbfake @@ -39,10 +39,11 @@ nbfake behavior can be changed by setting the following env vars: NBFAKE_HOST SSH to \$NBFAKE_HOST and run command NBFAKE_LOG Logfile for nbfake output NB_WASH_GROUPS comma-separated list of groups to wash into NB_WASH_ENABLED must be set in order to enable wash groups + NBFAKE_QUIET set to suppress informational output __EOF exit fi @@ -87,19 +88,21 @@ #============================================================================== # Run and log #============================================================================== +if [[ -z "$NBFAKE_QUIET" ]];then cat <<__EOF >&2 #====================================================================== # NBFAKE logging command to: $MY_NBFAKE_LOG # $WASHCMD $* #====================================================================== __EOF +fi if [[ -z "$MY_NBFAKE_HOST" ]]; then # Run locally sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &" else # run remotely ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\"" fi Index: utils/plot-code.scm ================================================================== --- utils/plot-code.scm +++ utils/plot-code.scm @@ -23,21 +23,41 @@ ;; dot -Tpdf plot.dot > plot.pdf ;; first param is comma separated list of files to include in the map, use - to do all ;; second param is list of regexs for functions to include in the map ;; third param is list of files to scan -(use regex srfi-69 srfi-13) +;; (use regex srfi-69 srfi-1 srfi-13) + +(module plot-code + * + +(import scheme chicken.base chicken.port chicken.string chicken.io) +(import chicken.process-context) +(import regex srfi-1 srfi-69 srfi-13 matchable) +(define files #f) +(define targs #f) +(define function-patt #f) (define targs #f) -(define files (cdr (cddddr (argv)))) - -(let ((targdat (cadddr (argv)))) - (if (equal? targdat "-") - (set! targs files) - (set! targs (string-split targdat ",")))) - -(define function-patt (car (cdr (cdddr (argv))))) + +(match (command-line-arguments) + ((targfiles fnrx . scanfiles) + (set! targs (string-split-fields "," targfiles #:infix)) + (set! function-patt fnrx) + (set! files scanfiles)) + (else + (print "Usage: plot-code file1.scm,file2.scm *.scm > plot.dot + dot -Tpdf plot.dot > plot.pdf") + (exit))) + +;; (define files (cdr (cddddr (argv)))) +;; +;; (let ((targdat (cadddr (argv)))) +;; (if (equal? targdat "-") +;; (set! targs files) +;; (set! targs (string-split targdat ",")))) + (define function-rx (regexp function-patt)) (define filedat-defns (make-hash-table)) (define filedat-usages (make-hash-table)) (define defn-rx (regexp "^\\s*\\(define\\s+\\(([^\\s\\)]+).*")) @@ -197,5 +217,6 @@ function-calls) (print "}") (exit) +) ADDED utils/plot-uses.scm Index: utils/plot-uses.scm ================================================================== --- /dev/null +++ utils/plot-uses.scm @@ -0,0 +1,147 @@ +#!/mfs/pkgs/chicken/4.8.0.5/bin/csi -nbq + +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +;; Coming soon (right?) Usage: plot-code file1.scm,file2.scm "fun1,fun2,x*" *.scm > plot.dot +;; Usage: plot-code file1.scm,file2.scm *.scm > plot.dot +;; dot -Tpdf plot.dot > plot.pdf +;; first param is comma separated list of files to include in the map, use - to do all +;; second param is list of regexs for functions to include in the map +;; third param is list of files to scan + +(module plot-uses + * + +(import scheme chicken) + +(use regex srfi-69 srfi-13) +(use matchable data-structures ports extras) + +(define unituses-rx (regexp "^\\(declare \\((unit|uses) ([^\\(\\)]+)\\).*")) + +(define (print-err . data) + (with-output-to-port (current-error-port) + (lambda () + (apply print data)))) + +(define (process-file ignores fname) + (with-input-from-file fname + (lambda () + (let loop ((modname "DUMMYMOD")) + (let* ((inl (read-line))) + (if (eof-object? inl) + #t + (match (string-search unituses-rx inl) + ((_ dtype unitname) + (if (equal? dtype "unit") + (loop unitname) + (begin + (if (equal? dtype "uses") + (if (not (or (member modname '("DUMMYMOD")) + (member modname ignores) + (member unitname ignores))) + (print " \""unitname"\" -> \""modname"\";")) + (print-err "ERROR: bad declare line \""inl"\"")) + (loop modname)))) + (else + (loop modname))))))))) + +(define (main) + (match (command-line-arguments) + (("todot" ignoreunits . files) + (let* ((ignores (string-split ignoreunits ","))) + (print-err "Making graph for files: " (string-intersperse files ", ")) + (print "digraph uses_unit {") + (for-each + (lambda (fname) + (print "// Filename: "fname) + (process-file ignores fname)) + files) + (print "}"))) + (else + (print-err "Usage: plot-uses u1,u2... file1.scm ...") + (print-err " where u1,u2... are units to ignore and file1.scm... are the files to process.")))) + +(main) + +) +;; +;; ;; Gather the usages +;; (print "digraph G {") +;; (define curr-cluster-num 0) +;; (define function-calls '()) +;; +;; (for-each +;; (lambda (fname) +;; (let ((last-func #f)) +;; (print-err "Processing file " fname) +;; (print "subgraph cluster_" curr-cluster-num " {") +;; (set! curr-cluster-num (+ curr-cluster-num 1)) +;; (with-input-from-file fname +;; (lambda () +;; (with-output-to-port (current-error-port) +;; (lambda () +;; (print "Analyzing file " fname))) +;; (print "label=\"" fname "\";") +;; (let loop ((inl (read-line)) +;; (fnname "toplevel") +;; (allcalls '())) +;; (if (eof-object? inl) +;; (begin +;; (set! function-calls (cons (list fnname allcalls) function-calls)) +;; (for-each +;; (lambda (call-name) +;; (hash-table-set! breadcrumbs call-name #t)) +;; allcalls) +;; (print-err "function: " fnname " allcalls: " allcalls)) +;; (let ((match (string-match defn-rx inl))) +;; (if match +;; (let ((func-name (cadr match))) +;; (if last-func +;; (print "\"" func-name "\" -> \"" last-func "\";") +;; (print "\"" func-name "\";")) +;; (set! last-func func-name) +;; (hash-table-set! breadcrumbs func-name #t) +;; (loop (read-line) +;; func-name +;; allcalls)) +;; (let ((calls (look-for-all-calls inl fnname))) +;; (loop (read-line) fnname (append allcalls calls))))))))) +;; (print "}"))) +;; targs) +;; +;; (print-err "breadcrumbs: " (hash-table-keys breadcrumbs)) +;; (print-err "function-calls: " function-calls) +;; +;; (for-each +;; (lambda (function-call) +;; (print-err "function-call: " function-call) +;; (let ((fnname (car function-call)) +;; (calls (cadr function-call))) +;; (for-each +;; (lambda (callname) +;; (print (if (hash-table-ref/default breadcrumbs callname #f) "" "// ") +;; "\"" fnname "\" -> \"" callname "\";")) +;; calls))) +;; function-calls) +;; +;; (print "}") +;; +;; (exit) +;;