Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,16 +28,27 @@
ezsteps.scm lock-queue.scm rmt.scm api.scm \
subrun.scm portlogger.scm archive.scm env.scm \
diff-report.scm cgisetup/models/pgdb.scm
# module source files
-MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm
+MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
+ tcp-transportmod.scm
all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
+transport-mode.scm : transport-mode.scm.template
+ cp transport-mode.scm.template transport-mode.scm
+
+dashboard-transport-mode.scm : dashboard-transport-mode.scm.template
+ cp dashboard-transport-mode.scm.template dashboard-transport-mode.scm
+
+megatest.scm : transport-mode.scm
+dashboard.scm : dashboard-transport-mode.scm
+
# dbmod.import.o is just a hack here
mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
+configf.o : commonmod.import.o
db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o
# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \
@@ -96,12 +107,12 @@
@echo $(MTESTHASH)
dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm
csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o dboard
-mtut: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm
- csc $(CSCOPTS) $(OFILES) $(MOFILES) mtut.scm -o mtut
+mtut: $(OFILES) $(MOFILES) $(MOIMPFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm
+ csc $(CSCOPTS) $(OFILES) $(MOFILES) $(MOIMPFILES) mtut.scm -o mtut
# include makefile.inc
TCMTOBJS = \
api.o \
@@ -359,22 +370,22 @@
$(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \
fi
install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
$(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
- $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
$(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
$(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
$(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
$(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0
+
# $(PREFIX)/bin/.$(ARCHSTR)/ndboard
-
+# $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
# $(PREFIX)/bin/newdashboard
$(PREFIX)/bin/.$(ARCHSTR) :
mkdir -p $(PREFIX)/bin/.$(ARCHSTR)
mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib
Index: TODO
==================================================================
--- TODO
+++ TODO
@@ -16,10 +16,13 @@
# along with Megatest. If not, see .
TODO
====
+23WW07
+. Remove use of *dbstruct-dbs*
+
WW15
. fill newview matrix with data, filter pipeline gui elements
. improve [script], especially indent handling
WW16
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -18,21 +18,26 @@
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
;;======================================================================
-(use srfi-69 posix)
-
(declare (unit api))
(declare (uses rmt))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses tasks))
+(declare (uses tcp-transportmod))
(import dbmod)
(import dbfile)
+(import tcp-transportmod)
+
+(use srfi-69
+ posix
+ matchable
+ s11n)
;; allow these queries through without starting a server
;;
(define api:read-only-queries
'(get-key-val-pairs
@@ -39,10 +44,11 @@
get-var
get-keys
get-key-vals
test-toplevel-num-items
get-test-info-by-id
+ get-test-state-status-by-id
get-steps-info-by-id
get-data-info-by-id
test-get-rundir-from-test-id
get-count-tests-running-for-testname
get-count-tests-running
@@ -59,12 +65,12 @@
get-run-info
get-run-status
get-run-state
get-run-stats
get-run-times
- get-targets
get-target
+ get-targets
;; register-run
get-tests-tags
get-test-times
get-tests-for-run
get-tests-for-run-state-status
@@ -141,262 +147,322 @@
tasks-add
tasks-set-state-given-param-key
))
(define *db-write-mutexes* (make-hash-table))
-
+(define *server-signature* #f)
;; These are called by the server on recipt of /api calls
;; - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;; - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
(db:open-no-sync-db) ;; sets *no-sync-db*
-;; (handle-exceptions
-;; exn
-;; (let ((call-chain (get-call-chain)))
-;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
-;; (print-call-chain (current-error-port))
-;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+ ;; (handle-exceptions
+ ;; exn
+ ;; (let ((call-chain (get-call-chain)))
+ ;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
+ ;; (print-call-chain (current-error-port))
+ ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
(if (> *api-process-request-count* 200)
(begin
(if (common:low-noise-print 30 "too many threads")
(debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay."))
(thread-sleep! 0.5) ;; take a nap
))
- (cond
- ((not (vector? dat)) ;; it is an error to not receive a vector
- (vector #f (vector #f "remote must be called with a vector")))
- #;((> *api-process-request-count* 200) ;; 20)
- (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
- (set! *server-overloaded* #t)
- (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
- (else
- (let* ((cmd-in (vector-ref dat 0))
- (cmd (if (symbol? cmd-in)
- cmd-in
- (string->symbol cmd-in)))
- (params (vector-ref dat 1))
- (run-id (if (null? params)
- 0
- (car params)))
- (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id)
- (hash-table-ref *db-write-mutexes* run-id)
- (let* ((newmutex (make-mutex)))
- (hash-table-set! *db-write-mutexes* run-id newmutex)
- newmutex)))
- (start-t (current-milliseconds))
- (readonly-mode (dbr:dbstruct-read-only dbstruct))
- (readonly-command (member cmd api:read-only-queries))
- (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
- (if (not readonly-command)
- (mutex-lock! write-mutex))
- (let* ((res
- (if writecmd-in-readonly-mode
- (conc "attempt to run write command "cmd" on a read-only database")
- (case cmd
- ;;===============================================
- ;; READ/WRITE QUERIES
- ;;===============================================
-
- ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
-
- ;; SERVERS
- ((start-server) (apply server:kind-run params))
- ((kill-server) (set! *server-run* #f))
-
- ;; TESTS
-
- ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params))
- ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
- ((test-set-state-status-by-id)
-
- ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
- (db:set-state-status-and-roll-up-items
- dbstruct
- (list-ref params 0) ; run-id
- (list-ref params 1) ; test-name
- #f ; item-path
- (list-ref params 2) ; state
- (list-ref params 3) ; status
- (list-ref params 4) ; comment
- ))
-
- ((delete-test-records) (apply db:delete-test-records dbstruct params))
- ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
- ((test-set-state-status) (apply db:test-set-state-status dbstruct params))
- ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params))
- ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
- ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params))
- ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params))
- ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params))
-
- ;; RUNS
- ((register-run) (apply db:register-run dbstruct params))
- ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params))
- ((delete-run) (apply db:delete-run dbstruct params))
- ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params))
- ((update-run-event_time) (apply db:update-run-event_time dbstruct params))
- ((update-run-stats) (apply db:update-run-stats dbstruct params))
- ((set-var) (apply db:set-var dbstruct params))
- ((inc-var) (apply db:inc-var dbstruct params))
- ((dec-var) (apply db:dec-var dbstruct params))
- ((del-var) (apply db:del-var dbstruct params))
- ((add-var) (apply db:add-var dbstruct params))
-
- ;; STEPS
- ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params))
- ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params))
-
- ;; TEST DATA
- ((test-data-rollup) (apply db:test-data-rollup dbstruct params))
- ((csv->test-data) (apply db:csv->test-data dbstruct params))
-
- ;; MISC
- ((sync-inmem->db) (let ((run-id (car params)))
- (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
- ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params))
- ((create-all-triggers) (db:create-all-triggers dbstruct))
- ((drop-all-triggers) (db:drop-all-triggers dbstruct))
-
- ;; TESTMETA
- ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params))
- ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params))
- ((get-tests-tags) (db:get-tests-tags dbstruct))
-
- ;; TASKS
- ((tasks-add) (apply tasks:add dbstruct params))
- ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
- ((tasks-get-last) (apply tasks:get-last dbstruct params))
-
- ;; NO SYNC DB
- ((no-sync-set) (apply db:no-sync-set *no-sync-db* params))
- ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params))
- ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params))
- ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params))
-
- ;; ARCHIVES
- ;; ((archive-get-allocations)
- ((archive-register-disk) (apply db:archive-register-disk dbstruct params))
- ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
- ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
-
- ;;======================================================================
- ;; READ ONLY QUERIES
- ;;======================================================================
-
- ;; KEYS
- ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params))
- ((get-keys) (db:get-keys dbstruct))
- ((get-key-vals) (apply db:get-key-vals dbstruct params))
- ((get-target) (apply db:get-target dbstruct params))
- ((get-targets) (db:get-targets dbstruct))
-
- ;; ARCHIVES
- ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params))
-
- ;; TESTS
- ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params))
- ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params))
- ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params))
- ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
- ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params))
- ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
- ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params))
- ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params))
- ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
- ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params))
- ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params))
- ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params))
- ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params))
- ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
- ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params))
- ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
- ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params))
- ;; ((synchash-get) (apply synchash:server-get dbstruct params))
- ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params))
- ((get-test-times) (apply db:get-test-times dbstruct params))
-
- ;; RUNS
- ((get-run-info) (apply db:get-run-info dbstruct params))
- ((get-run-status) (apply db:get-run-status dbstruct params))
- ((get-run-state) (apply db:get-run-state dbstruct params))
- ((set-run-status) (apply db:set-run-status dbstruct params))
- ((set-run-state-status) (apply db:set-run-state-status dbstruct params))
- ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params))
- ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params))
- ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params))
- ((get-test-id) (apply db:get-test-id dbstruct params))
- ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params))
- ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params))
- ((get-runs) (apply db:get-runs dbstruct params))
- ((simple-get-runs) (apply db:simple-get-runs dbstruct params))
- ((get-num-runs) (apply db:get-num-runs dbstruct params))
- ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params))
- ((get-all-run-ids) (db:get-all-run-ids dbstruct))
- ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params))
- ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params))
- ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params))
- ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params))
- ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params))
- ((get-var) (apply db:get-var dbstruct params))
- ((get-run-stats) (apply db:get-run-stats dbstruct params))
- ((get-run-times) (apply db:get-run-times dbstruct params))
-
- ;; STEPS
- ((get-steps-data) (apply db:get-steps-data dbstruct params))
- ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params))
- ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params))
-
- ;; TEST DATA
- ((read-test-data) (apply db:read-test-data dbstruct params))
- ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params))
- ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params))
-
- ;; MISC
- ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params))
- ((have-incompletes?) (apply db:have-incompletes? dbstruct params))
- ((login) (apply db:login dbstruct params))
- ((general-call) (let ((stmtname (car params))
- (run-id (cadr params))
- (realparams (cddr params)))
- (db:general-call dbstruct run-id stmtname realparams)))
- ((sdb-qry) (apply sdb:qry params))
- ((ping) (current-process-id))
- ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params))
- ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params))
- ;; TESTMETA
- ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params))
-
- ;; TASKS
- ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params))
- (else
- (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
- (conc "ERROR: BAD api call " cmd))))))
- (if (not readonly-command)
- (mutex-unlock! write-mutex))
-
- ;; save all stats
- (let ((delta-t (- (current-milliseconds)
- start-t))
- (modified-cmd (if (eq? cmd 'general-call)
- (string->symbol (conc "general-call-" (car params)))
- cmd)))
- (hash-table-set! *db-api-call-time* modified-cmd
- (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
- (if writecmd-in-readonly-mode
- (begin
- #;(common:telemetry-log (conc "api-out:"(->string cmd))
- payload: `((params . ,params)
- (ok-res . #t)))
- (vector #f res))
- (begin
- #;(common:telemetry-log (conc "api-out:"(->string cmd))
- payload: `((params . ,params)
- (ok-res . #f)))
- (vector #t res))))))))
+ (cond
+ ((not (vector? dat)) ;; it is an error to not receive a vector
+ (vector #f (vector #f "remote must be called with a vector")))
+ #;((> *api-process-request-count* 200) ;; 20)
+ (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
+ (set! *server-overloaded* #t)
+ (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
+ (else
+ (let* ((cmd-in (vector-ref dat 0))
+ (cmd (if (symbol? cmd-in)
+ cmd-in
+ (string->symbol cmd-in)))
+ (params (vector-ref dat 1))
+ (run-id (if (null? params)
+ 0
+ (car params)))
+ (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id)
+ (hash-table-ref *db-write-mutexes* run-id)
+ (let* ((newmutex (make-mutex)))
+ (hash-table-set! *db-write-mutexes* run-id newmutex)
+ newmutex)))
+ (start-t (current-milliseconds))
+ (readonly-mode (dbr:dbstruct-read-only dbstruct))
+ (readonly-command (member cmd api:read-only-queries))
+ (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
+ (if (not readonly-command)
+ (mutex-lock! write-mutex))
+ (let* ((tmppath (dbr:dbstruct-tmppath dbstruct))
+ (clean-run-id (cond
+ ((number? run-id) run-id)
+ ((equal? run-id #f) "main")
+ (else "other")))
+ (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params)))
+ (res
+ (if writecmd-in-readonly-mode
+ (conc "attempt to run write command "cmd" on a read-only database")
+ (api:dispatch-request dbstruct cmd run-id params))))
+ (delete-file* crumbfile)
+ (if (not readonly-command)
+ (mutex-unlock! write-mutex))
+
+ ;; save all stats
+ (let ((delta-t (- (current-milliseconds)
+ start-t))
+ (modified-cmd (if (eq? cmd 'general-call)
+ (string->symbol (conc "general-call-" (car params)))
+ cmd)))
+ (hash-table-set! *db-api-call-time* modified-cmd
+ (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
+ (if writecmd-in-readonly-mode
+ (begin
+ #;(common:telemetry-log (conc "api-out:"(->string cmd))
+ payload: `((params . ,params)
+ (ok-res . #t)))
+ (vector #f res))
+ (begin
+ #;(common:telemetry-log (conc "api-out:"(->string cmd))
+ payload: `((params . ,params)
+ (ok-res . #f)))
+ (vector #t res))))))))
+
+;; indat is (cmd run-id params meta)
+;;
+;; WARNING: Do not print anything in the lambda of this function as it
+;; reads/writes to current in/out port
+;;
+(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params)
+ (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.")
+ (if (not *server-signature*)
+ (set! *server-signature* (tt:mk-signature *toppath*)))
+ (lambda ()
+ (let* ((indat (deserialize))
+ (newcount (+ *api-process-request-count* 1))
+ (delay-wait (if (> newcount 10)
+ (- newcount 10)
+ 0)))
+ (set! *api-process-request-count* newcount)
+ (set! *db-last-access* (current-seconds))
+ (match indat
+ ((cmd run-id params meta)
+ (let* ((ttdat *server-info*)
+ (server-state (tt-state ttdat))
+ (status (cond
+ ((> newcount 30) 'busy)
+ ((> newcount 15) 'loaded)
+ (else 'ok)))
+ (errmsg (case status
+ ((busy) (conc "Server overloaded, "newcount" threads in flight"))
+ ((loaded) (conc "Server loaded, "newcount" threads in flight"))
+ (else #f)))
+ (result (case status
+ ((busy) (- newcount 29))
+ ((loaded) #f)
+ (else
+ (case cmd
+ ((ping) *server-signature*)
+ (else
+ (api:dispatch-request dbstruct cmd run-id params))))))
+ (meta (case cmd
+ ((ping) `((sstate . ,server-state)))
+ (else `((wait . ,delay-wait)))))
+ (payload (list status errmsg result meta)))
+ (set! *api-process-request-count* (- *api-process-request-count* 1))
+ (serialize payload)))
+ (else
+ (assert #f "FATAL: failed to deserialize indat "indat))))))
+
+
+(define (api:dispatch-request dbstruct cmd run-id params)
+ (db:open-no-sync-db)
+ (case cmd
+ ;;===============================================
+ ;; READ/WRITE QUERIES
+ ;;===============================================
+
+ ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
+
+ ;; SERVERS
+ ((start-server) (apply server:kind-run params))
+ ((kill-server) (set! *server-run* #f))
+
+ ;; TESTS
+
+ ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params))
+ ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
+ ((test-set-state-status-by-id)
+
+ ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
+ (db:set-state-status-and-roll-up-items
+ dbstruct
+ (list-ref params 0) ; run-id
+ (list-ref params 1) ; test-name
+ #f ; item-path
+ (list-ref params 2) ; state
+ (list-ref params 3) ; status
+ (list-ref params 4) ; comment
+ ))
+
+ ((delete-test-records) (apply db:delete-test-records dbstruct params))
+ ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
+ ((test-set-state-status) (apply db:test-set-state-status dbstruct params))
+ ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params))
+ ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
+ ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params))
+ ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params))
+ ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params))
+
+ ;; RUNS
+ ((register-run) (apply db:register-run dbstruct params))
+ ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params))
+ ((delete-run) (apply db:delete-run dbstruct params))
+ ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params))
+ ((update-run-event_time) (apply db:update-run-event_time dbstruct params))
+ ((update-run-stats) (apply db:update-run-stats dbstruct params))
+ ((set-var) (apply db:set-var dbstruct params))
+ ((inc-var) (apply db:inc-var dbstruct params))
+ ((dec-var) (apply db:dec-var dbstruct params))
+ ((del-var) (apply db:del-var dbstruct params))
+ ((add-var) (apply db:add-var dbstruct params))
+
+ ;; STEPS
+ ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params))
+ ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params))
+
+ ;; TEST DATA
+ ((test-data-rollup) (apply db:test-data-rollup dbstruct params))
+ ((csv->test-data) (apply db:csv->test-data dbstruct params))
+
+ ;; MISC
+ ((sync-inmem->db) (let ((run-id (car params)))
+ (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
+ ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params))
+ ((create-all-triggers) (db:create-all-triggers dbstruct))
+ ((drop-all-triggers) (db:drop-all-triggers dbstruct))
+
+ ;; TESTMETA
+ ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params))
+ ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params))
+ ((get-tests-tags) (db:get-tests-tags dbstruct))
+
+ ;; TASKS
+ ((tasks-add) (apply tasks:add dbstruct params))
+ ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
+ ((tasks-get-last) (apply tasks:get-last dbstruct params))
+
+ ;; NO SYNC DB
+ ((no-sync-set) (apply db:no-sync-set *no-sync-db* params))
+ ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params))
+ ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params))
+ ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params))
+
+ ;; ARCHIVES
+ ;; ((archive-get-allocations)
+ ((archive-register-disk) (apply db:archive-register-disk dbstruct params))
+ ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
+ ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
+
+ ;;======================================================================
+ ;; READ ONLY QUERIES
+ ;;======================================================================
+
+ ;; KEYS
+ ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params))
+ ((get-keys) (db:get-keys dbstruct))
+ ((get-key-vals) (apply db:get-key-vals dbstruct params))
+ ((get-target) (apply db:get-target dbstruct params))
+ ((get-targets) (db:get-targets dbstruct))
+
+ ;; ARCHIVES
+ ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params))
+
+ ;; TESTS
+ ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params))
+ ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params))
+ ((get-test-state-status-by-id) (apply db:get-test-state-status-by-id dbstruct params))
+ ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params))
+ ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
+ ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params))
+ ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
+ ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params))
+ ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params))
+ ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
+ ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params))
+ ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params))
+ ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params))
+ ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params))
+ ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
+ ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params))
+ ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
+ ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params))
+ ;; ((synchash-get) (apply synchash:server-get dbstruct params))
+ ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params))
+ ((get-test-times) (apply db:get-test-times dbstruct params))
+
+ ;; RUNS
+ ((get-run-info) (apply db:get-run-info dbstruct params))
+ ((get-run-status) (apply db:get-run-status dbstruct params))
+ ((get-run-state) (apply db:get-run-state dbstruct params))
+ ((get-run-state-status) (apply db:get-run-state-status dbstruct params))
+ ((set-run-status) (apply db:set-run-status dbstruct params))
+ ((set-run-state-status) (apply db:set-run-state-status dbstruct params))
+ ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db dbstruct params))
+ ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params))
+ ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params))
+ ((get-test-id) (apply db:get-test-id dbstruct params))
+ ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params))
+ ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params))
+ ((get-runs) (apply db:get-runs dbstruct params))
+ ((simple-get-runs) (apply db:simple-get-runs dbstruct params))
+ ((get-num-runs) (apply db:get-num-runs dbstruct params))
+ ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params))
+ ((get-all-run-ids) (db:get-all-run-ids dbstruct))
+ ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params))
+ ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params))
+ ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params))
+ ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params))
+ ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params))
+ ((get-var) (apply db:get-var dbstruct params))
+ ((get-run-stats) (apply db:get-run-stats dbstruct params))
+ ((get-run-times) (apply db:get-run-times dbstruct params))
+
+ ;; STEPS
+ ((get-steps-data) (apply db:get-steps-data dbstruct params))
+ ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params))
+ ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params))
+
+ ;; TEST DATA
+ ((read-test-data) (apply db:read-test-data dbstruct params))
+ ((read-test-data-varpatt) (apply db:read-test-data-varpatt dbstruct params))
+ ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params))
+
+ ;; MISC
+ ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params))
+ ((have-incompletes?) (apply db:have-incompletes? dbstruct params))
+ ((login) (apply db:login dbstruct params))
+ ((general-call) (let ((stmtname (car params))
+ (run-id (cadr params))
+ (realparams (cddr params)))
+ (db:general-call dbstruct run-id stmtname realparams)))
+ ((sdb-qry) (apply sdb:qry params))
+ ((ping) (current-process-id))
+ ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params))
+ ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params))
+ ;; TESTMETA
+ ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params))
+
+ ;; TASKS
+ ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params))
+ (else
+ (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
+ (conc "ERROR: BAD api call " cmd))))
;; http-server send-response
;; api:process-request
;; db:*
;;
Index: archive.scm
==================================================================
--- archive.scm
+++ archive.scm
@@ -21,10 +21,12 @@
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 format md5 message-digest srfi-18)
(declare (unit archive))
(declare (uses db))
(declare (uses common))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(include "db_records.scm")
;;======================================================================
ADDED artifacts.scm
Index: artifacts.scm
==================================================================
--- /dev/null
+++ artifacts.scm
@@ -0,0 +1,24 @@
+;;======================================================================
+;; Copyright 2019, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+
+(declare (unit artifacts))
+
+(include "artifacts/artifacts.scm")
+
ADDED artifacts/README
Index: artifacts/README
==================================================================
--- /dev/null
+++ artifacts/README
@@ -0,0 +1,1 @@
+NOTE: keep megatest/artifacts/ in sync with datastore/artifacts
ADDED artifacts/artifacts.meta
Index: artifacts/artifacts.meta
==================================================================
--- /dev/null
+++ artifacts/artifacts.meta
@@ -0,0 +1,21 @@
+;; -*- scheme -*-
+(
+; Your egg's license:
+(license "BSD")
+
+; Pick one from the list of categories (see below) for your egg and enter it
+; here.
+(category db)
+
+; A list of eggs pkts depends on. If none, you can omit this declaration
+; altogether. If you are making an egg for chicken 3 and you need to use
+; procedures from the `files' unit, be sure to include the `files' egg in the
+; `needs' section (chicken versions < 3.4.0 don't provide the `files' unit).
+; `depends' is an alias to `needs'.
+;; (needs (autoload "3.0"))
+
+; A list of eggs required for TESTING ONLY. See the `Tests' section.
+(test-depends test)
+
+(author "Matt Welland")
+(synopsis "A sha1-chain based datastore similar to the data format in fossil scm, consisting of artifacts of single line cards."))
ADDED artifacts/artifacts.release-info
Index: artifacts/artifacts.release-info
==================================================================
--- /dev/null
+++ artifacts/artifacts.release-info
@@ -0,0 +1,3 @@
+(repo fossil "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}")
+(uri zip "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}/zip/{egg-name}.zip?uuid={egg-release}")
+(release "1.0")
ADDED artifacts/artifacts.scm
Index: artifacts/artifacts.scm
==================================================================
--- /dev/null
+++ artifacts/artifacts.scm
@@ -0,0 +1,1624 @@
+;; Copyright 2006-2017, Matthew Welland.
+;;
+;; This file is part of artifacts
+;;
+;; Pkts is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Pkts is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Pkts. If not, see .
+;;
+
+;; CARDS:
+;;
+;; A card is a line of text, the first two characters are a letter followed by a
+;; space. The letter is the card type.
+;;
+;; artifact:
+;;
+;; An artifact is a sorted list of cards with a final card Z that contains the shar1 hash
+;; of all of the preceding cards.
+;;
+;; AARTIFACT:
+;;
+;; An alist mapping card types to card data
+;; '((T . "artifacttype")
+;; (a . "some content"))
+;;
+;; EARTIFACT:
+;;
+;; Extended packet using friendly keys. Must use a artifactspec to convert to/from eartifacts
+;; '((ptype . "artifacttype")
+;; (adata . "some content))
+;;
+;; DARTIFACT:
+;;
+;; artifacts pulled from the database have this format:
+;;
+;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist
+;; (t . "v1.63/tip/dev")
+;; (c . "QUICKPATT")
+;; (T . "runstart")
+;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd")
+;; (D . "1488995096.0"))
+;; (id . 8)
+;; (group-id . 0)
+;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b")
+;; (parent . "")
+;; (artifact-type . "runstart")
+;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b"))
+;;
+;; artifactspec is alist of alists mapping types and nicekeys to keys
+;;
+;; '((posting . ((title . t)
+;; (url . u)
+;; (blurb . b)))
+;; (comment . ((comment . c)
+;; (score . s))))
+
+;; Reserved cards:
+;; P : artifact parent
+;; R : reference artifact containing mapping of short string -> sha1sum strings
+;; T : artifact type
+;; D : current time from (current-time), unless provided
+;; Z : shar1 hash of the packet
+
+;; Example usage:
+;;
+;; Create a artifact:
+;;
+;; (use artifacts)
+;; (define-values (uuid artifact)
+;; (alist->artifact
+;; '((fruit . "apple") (meat . "beef")) ;; this is the data to convert
+;; '((foods (fruit . f) (meat . m))) ;; this is the artifact spec
+;; ptype:
+;; 'foods))
+;;
+;; Add to artifact queue:
+;;
+;; (define db (open-queue-db "/tmp/artifacts" "artifacts.db"))
+;; (add-to-queue db artifact uuid 'foods #f 0) ;; no parent and use group_id of 0
+;;
+;; Retrieve the packet from the db and extract a value:
+;;
+;; (alist-ref
+;; 'meat
+;; (dartifact->alist
+;; (car (get-dartifacts db #f 0 #f))
+;; '((foods (fruit . f)
+;; (meat . m)))))
+;; => "beef"
+;;
+
+(module artifacts
+(
+;; cards, util and misc
+;; sort-cards
+;; calc-sha1
+;;
+;; low-level constructor procs, exposed only for development/testing, will be removed
+construct-sdat
+construct-artifact
+card->type/value
+add-z-card
+
+;; queue database procs
+open-queue-db
+add-to-queue
+create-and-queue
+;; lookup-by-uuid
+lookup-by-id
+get-dartifacts
+get-not-processed-artifacts
+get-related
+find-artifacts
+process-artifacts
+get-descendents
+get-ancestors
+get-artifacts
+;; get-last-descendent
+;; with-queue-db
+;; load-artifacts-to-db
+
+;; procs that operate directly on artifacts, sdat, aartifacts, dartifacts etc.
+artifact->alist ;; artifact -> aartifact (i.e. alist)
+artifact->sdat ;; artifact -> '("a aval" "b bval" ...)
+sdat->alist ;; '("a aval" "b bval"...) -> ((a . "aval")(b . "bval") ...)
+dblst->dartifacts ;; convert list of tuples from queue db into dartifacts
+dartifact->alist ;; flatten a dartifact into an alist containing all db fields and the artifact alist
+dartifacts->alists ;; apply dartifact->alist to a list of alists using a artifact-spec
+alist->artifact ;; returns two values uuid, artifact
+get-value ;; looks up a value given a key in a dartifact
+flatten-all ;; merge the list of values from a query which includes a artifact into a flat alist <== really useful!
+check-artifact
+
+;; artifact alists
+write-alist->artifact
+read-artifact->alist
+
+;; archive database
+;; archive-open-db
+;; write-archive-artifacts
+;; archive-artifacts
+;; mark-processed
+
+;; artifactsdb
+artifactdb-conn ;; useful
+artifactdb-fname
+artifactsdb-open
+artifactsdb-close
+artifactsdb-add-record
+;; temporary
+artifactdb-artifactspec
+
+;; utility procs
+increment-string ;; used to get indexes for strings in ref artifacts
+make-report ;; make a .dot file
+calc-sha1
+uuid-first-two-letters
+uuid-remaining-letters
+
+;; file and directory utils
+multi-glob
+capture-dir
+file-get-sha1
+check-same
+link-or-copy
+same-partition?
+link-if-same-partition
+archive-copy
+write-to-archive
+artifact-rollup
+read-artifacts-into-hash
+hash-of-artifacts->bundle
+archive-dest
+
+;; pathname-full-filename
+
+;; minimal artifact functions
+minimal-artifact-read
+minimal-artifact->alist
+afact-get-D
+afact-get-Z
+afact-get-T
+afact-get
+afact-get-number/default
+
+
+;; bundles
+write-bundle
+read-bundle
+
+;; new artifacts db
+with-todays-adb
+get-all-artifacts
+refresh-artifacts-db
+
+)
+
+(import (chicken base) scheme (chicken process) (chicken time posix)
+ (chicken io) (chicken file) (chicken pathname)
+ chicken.process-context.posix (chicken string)
+ (chicken time) (chicken sort) (chicken file posix) (chicken condition) srfi-1
+ regex srfi-13 srfi-69 (chicken port) (chicken process-context)
+ crypt sha1 matchable message-digest sqlite3 typed-records
+ directory-utils
+ scsh-process)
+
+;;======================================================================
+;; DATA MANIPULATION UTILS
+;;======================================================================
+
+(define-inline (unescape-data data)
+ (string-translate* data '(("\\n" . "\n") ("\\\\" . "\\"))))
+
+(define-inline (escape-data data)
+ (string-translate* data '(("\n" . "\\n") ("\\" . "\\\\"))))
+
+(define-inline (make-card type data)
+ (conc type " " (escape-data (->string data))))
+
+;; reverse an alist for doing artifactkey -> external key conversions
+;;
+(define-inline (reverse-aspec aspec)
+ (map (lambda (dat)
+ (cons (cdr dat)(car dat)))
+ aspec))
+
+;; add a card to the list of cards, sdat
+;; if type is #f return only sdat
+;; if data is #f return only sdat
+;;
+(define-inline (add-card sdat type data)
+ (if (and type data)
+ (cons (make-card type data) sdat)
+ sdat))
+
+;;======================================================================
+;; STRING AS FUNKY NUMBER
+;;======================================================================
+
+;; NOTE: PTDZ are removed as they are reserved. NB// the R card is not used in a
+;; ref, instead the P parent card is used.
+;; Question: Why does it matter to remove PTDZ?
+;; To make the ref easier to use the ref strings will be the keys
+;; so we cannot have overlap with any actual keys. But this is a
+;; bit silly. What we need to do instead is reject keys of length
+;; one where the char is in PTDZ
+;;
+;; This is basically base92
+;;
+(define string-num-chars (string->list "!#$%&'()*+,-./0123456789:;<=>?@ABCEFGHIJKLMNOQRSUVWXY[\\]^_abcdefghijklmnopqrstuvwxyz{|}~"))
+;; "0123456789abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOQSUVWXY!#$%&'()*+,-./[]:;<=>?\\^_{}|"))
+
+(define (char-incr inchar)
+ (let* ((carry #f)
+ (next-char (let ((rem (member inchar string-num-chars)))
+ (if (eq? (length rem) 1) ;; we are at the last character in our string-num-chars list
+ (begin
+ (set! carry #t)
+ (car string-num-chars))
+ (cadr rem)))))
+ (values next-char carry)))
+
+(define (increment-string str)
+ (if (string-null? str)
+ "0"
+ (let ((strlst (reverse (string->list str)))) ;; need to process the string from the lsd
+ (list->string
+ (let loop ((hed (car strlst))
+ (tal (cdr strlst))
+ (res '()))
+ (let-values (((newhed carry)(char-incr hed)))
+ ;; (print "newhed: " newhed " carry: " carry " tal: " tal)
+ (let ((newres (cons newhed res)))
+ (if carry ;; we'll have to propagate the carry
+ (if (null? tal) ;; at the end, tack on "0" (which is really a "1")
+ (cons (car string-num-chars) newres)
+ (loop (car tal)(cdr tal) newres))
+ (append (reverse tal) newres)))))))))
+
+;;======================================================================
+;; P K T S D B I N T E R F A C E
+;;
+;; INTEGER, REAL, TEXT
+;;======================================================================
+;;
+;; spec
+;; ( (tablename1 . (field1name L1 TYPE)
+;; (field2name L2 TYPE) ... )
+;; (tablename2 ... ))
+;;
+;; Example: (tests (testname n TEXT)
+;; (rundir r TEXT)
+;; ... )
+;;
+;; artifact keys are taken from the first letter, if that is not unique
+;; then look at the next letter and so on
+;;
+
+;; simplify frequent need to get one result with default
+;;
+(define (get-one db default qry . params)
+ (apply fold-row
+ car
+ default
+ db
+ qry
+ params))
+
+(define (get-rows db qry . params)
+ (apply fold-row
+ cons
+ db
+ qry
+ params))
+
+;; use this struct to hold the artifactspec and the db handle
+;;
+(defstruct artifactdb
+ (fname #f)
+ (artifactsdb-spec #f)
+ (artifactspec #f) ;; cache the artifactspec
+ (field-keys #f) ;; cache the field->key mapping (field1 . k1) ...
+ (key-fields #f) ;; cache the key->field mapping
+ (conn #f)
+ )
+
+;; WARNING: There is a simplification in the artifactsdb spec w.r.t. artifactspec.
+;; The field specs are the cdr of the table list - not a full
+;; list. The extra list level in artifactspec is gratuitous and should
+;; be removed.
+;;
+(define (artifactsdb-spec->artifactspec tables-spec)
+ (map (lambda (tablespec)
+ (list (car tablespec)
+ (map (lambda (field-spec)
+ (cons (car field-spec)(cadr field-spec)))
+ (cdr tablespec))))
+ tables-spec))
+
+(define (artifactsdb-open dbfname artifactsdb-spec)
+ (let* ((pdb (make-artifactdb))
+ (dbexists (file-exists? dbfname))
+ (db (open-database dbfname)))
+ (artifactdb-artifactsdb-spec-set! pdb artifactsdb-spec)
+ (artifactdb-artifactspec-set! pdb (artifactsdb-spec->artifactspec artifactsdb-spec))
+ (artifactdb-fname-set! pdb dbfname)
+ (artifactdb-conn-set! pdb db)
+ (if (not dbexists)
+ (artifactsdb-init pdb))
+ pdb))
+
+(define (artifactsdb-init artifactsdb)
+ (let* ((db (artifactdb-conn artifactsdb))
+ (artifactsdb-spec (artifactdb-artifactsdb-spec artifactsdb)))
+ ;; create a table for the artifacts themselves
+ (execute db "CREATE TABLE IF NOT EXISTS artifacts (id INTEGER PRIMARY KEY, zkey TEXT, record_id INTEGER, artifact TEXT);")
+ (for-each
+ (lambda (table)
+ (let* ((table-name (car table))
+ (fields (cdr table))
+ (stmt (conc "CREATE TABLE IF NOT EXISTS "
+ table-name
+ " (id INTEGER PRIMARY KEY,"
+ (string-intersperse
+ (map (lambda (fieldspec)
+ (conc (car fieldspec) " "
+ (caddr fieldspec)))
+ fields)
+ ",")
+ ");")))
+ (execute db stmt)))
+ artifactsdb-spec)))
+
+;; create artifact from the data and insert into artifacts table
+;;
+;; data is assoc list of (field . value) ...
+;; tablename is a symbol matching the table name
+;;
+(define (artifactsdb-add-record artifactsdb tablename data #!optional (parent #f))
+ (let*-values (((zkey artifact) (alist->artifact data (artifactdb-artifactspec artifactsdb) ptype: tablename)))
+ ;; have the data as alist so insert it into appropriate table also
+ (let* ((db (artifactdb-conn artifactsdb)))
+ ;; TODO: Address collisions
+ (execute db "INSERT INTO artifacts (zkey,artifact,record_id) VALUES (?,?,?);"
+ zkey artifact -1)
+ (let* (;; (artifactid (artifactsdb-artifactkey->artifactid artifactsdb artifactkey))
+ (record-id (artifactsdb-insert artifactsdb tablename data)))
+ (execute db "UPDATE artifacts SET record_id=? WHERE zkey=?;"
+ record-id zkey)
+ ))))
+
+;;
+(define (artifactsdb-insert artifactsdb tablename data)
+ (let* ((db (artifactdb-conn artifactsdb))
+ (stmt (conc "INSERT INTO " tablename
+ " (" (string-intersperse (map conc (map car data)) ",")
+ ") VALUES ('"
+ ;; TODO: Add lookup of data type and do not
+ ;; wrap integers with quotes
+ (string-intersperse (map conc (map cdr data)) "','")
+ "');")))
+ (print "stmt: " stmt)
+ (execute db stmt)
+ ;; lookup the record-id and return it
+
+ ))
+
+(define (artifactsdb-close artifactsdb)
+ (finalize! (artifactdb-conn artifactsdb)))
+
+;; (let loop ((s "0")(n 0))(print s)(if (< n 5000)(loop (increment-string s)(+ n 1))))
+
+;;======================================================================
+;; CARDS, MISC and UTIL
+;;======================================================================
+
+;; given string (likely multi-line) "dat" return shar1 hash
+;;
+(define (calc-sha1 instr)
+ (message-digest-string
+ (sha1-primitive)
+ instr))
+
+;; given a single card return its type and value
+;;
+(define (card->type/value card)
+ (let ((ctype (substring card 0 1))
+ (cval (substring card 2 (string-length card))))
+ (values (string->symbol ctype) cval)))
+
+;;======================================================================
+;; SDAT procs
+;; sdat is legacy/internal usage. Intention is to remove sdat calls from
+;; the exposed calls.
+;;======================================================================
+
+;; sort list of cards
+;;
+(define-inline (sort-cards sdat)
+ (sort sdat string<=?))
+
+;; artifact rules
+;; 1. one card per line
+;; 2. at least one card
+;; 3. no blank lines
+
+;; given sdat, a list of cards return uuid, packet (as sdat)
+;;
+(define (add-z-card sdat)
+ (let* ((sorted-sdat (sort-cards sdat))
+ (dat (string-intersperse sorted-sdat "\n"))
+ (uuid (calc-sha1 dat)))
+ (values
+ uuid
+ (conc
+ dat
+ "\nZ "
+ uuid))))
+
+(define (check-artifact artifact)
+ (handle-exceptions
+ exn
+ #f ;; anything goes wrong - call it a crappy artifact
+ (let* ((sdat (string-split artifact "\n"))
+ (rdat (reverse sdat)) ;; reversed
+ (zdat (car rdat))
+ (Z (cadr (string-split zdat)))
+ (cdat (string-intersperse (reverse (cdr rdat)) "\n")))
+ (equal? Z (calc-sha1 cdat)))))
+
+;;======================================================================
+;; AARTIFACTs
+;;======================================================================
+
+;; convert a sdat (list of cards) to an alist
+;;
+(define (sdat->alist sdat)
+ (let loop ((hed (car sdat))
+ (tal (cdr sdat))
+ (res '()))
+ (let-values (( (ctype cval)(card->type/value hed) ))
+ ;; if this card is not one of the common ones tack it on to rem
+ (let* ((oldval (alist-ref ctype res))
+ (newres (cons (cons ctype
+ (if oldval ;; list or string
+ (if (list? oldval)
+ (cons cval oldval)
+ (cons cval (list oldval)))
+ cval))
+ res)))
+ (if (null? tal)
+ newres
+ (loop (car tal)(cdr tal) newres))))))
+
+;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist
+;; (t . "v1.63/tip/dev")
+;; (c . "QUICKPATT")
+;; (T . "runstart")
+;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd")
+;; (D . "1488995096.0"))
+;; (id . 8)
+;; (group-id . 0)
+;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b")
+;; (parent . "")
+;; (artifact-type . "runstart")
+;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b"))
+;;
+;; artifactspec is alist of alists mapping types and nicekeys to keys
+;;
+;; '((posting . ((title . t)
+;; (url . u)
+;; (blurb . b)))
+;; (comment . ((comment . c)
+;; (score . s))))
+
+;; DON'T USE?
+;;
+(define (get-value field dartifact . spec-in)
+ (if (null? spec-in)
+ (alist-ref field dartifact)
+ (let* ((spec (car spec-in))
+ (aartifact (alist-ref 'aartifact dartifact))) ;; get the artifact alist
+ (if (and aartifact spec)
+ (let* ((ptype (alist-ref 'artifact-type dartifact))
+ (pspec (alist-ref (string->symbol ptype) spec))) ;; do we have a spec for this type of artifact
+ (and pspec
+ (let* ((key (alist-ref field pspec)))
+ (and key (alist-ref key aartifact)))))
+ #f))))
+
+;; convert a dartifact to a pure alist given a artifactspec
+;; this flattens out the alist to include the data from
+;; the queue database record
+;;
+(define (dartifact->alist dartifact artifactspec)
+ (let* ((aartifact (alist-ref 'aartifact dartifact))
+ (artifact-type (or (alist-ref 'artifact-type dartifact) ;; artifact-type is from the database field artifact_type
+ (alist-ref 'T aartifact)))
+ (artifact-fields (alist-ref (string->symbol artifact-type) artifactspec))
+ (rev-fields (if artifact-fields
+ (reverse-aspec artifact-fields)
+ '())))
+ (append (map (lambda (entry)
+ (let* ((artifact-key (car entry))
+ (new-key (or (alist-ref artifact-key rev-fields) artifact-key)))
+ `(,new-key . ,(cdr entry))))
+ aartifact)
+ dartifact)))
+
+;; convert a list of dartifacts into a list of alists using artifact-spec
+;;
+(define (dartifacts->alists dartifacts artifact-spec)
+ (map (lambda (x)
+ (dartifact->alist x artifact-spec))
+ dartifacts))
+
+;; Generic flattener, make the tuple and artifact into a single flat alist
+;;
+;; qry-result-spec is a list of symbols corresponding to each field
+;;
+(define (flatten-all inlst artifactspec . qry-result-spec)
+ (map
+ (lambda (tuple)
+ (dartifact->alist
+ (apply dblst->dartifacts tuple qry-result-spec)
+ artifactspec))
+ inlst))
+
+;; call like this:
+;; (construct-sdat 'a "a data" 'S "S data" ...)
+;; returns list of cards
+;; ( "A a value" "D 12345678900" ...)
+;;
+(define (construct-sdat . alldat)
+ (let ((have-D-card #f)) ;; flag
+ (if (even? (length alldat))
+ (let loop ((type (car alldat))
+ (data (cadr alldat))
+ (tail (cddr alldat))
+ (res '()))
+ (if (eq? type 'D)(set! have-D-card #t))
+ (if (null? tail)
+ (if have-D-card ;; return the constructed artifact, add a D card if none found
+ (add-card res type data)
+ (add-card
+ (add-card res 'D (current-seconds))
+ type data))
+ (loop (car tail)
+ (cadr tail)
+ (cddr tail)
+ (add-card res type data))))
+ #f))) ;; #f means it failed to create the sdat
+
+(define (construct-artifact . alldat)
+ (add-z-card
+ (apply construct-sdat alldat)))
+
+;;======================================================================
+;; CONVERTERS
+;;======================================================================
+
+(define (artifact->sdat artifact)
+ (map unescape-data (string-split artifact "\n")))
+
+;; given a pure artifact return an alist
+;;
+(define (artifact->alist artifact #!key (artifactspec #f))
+ (let ((sdat (cond
+ ((string? artifact) (artifact->sdat artifact))
+ ((list? artifact) artifact)
+ (else #f))))
+ (if artifact
+ (if artifactspec
+ (dartifact->alist (list (cons 'aartifact (sdat->alist sdat))) artifactspec)
+ (sdat->alist sdat))
+ #f)))
+
+;; convert an alist to an sdat
+;; in: '((a . "blah")(b . "foo"))
+;; out: '("a blah" "b foo")
+;;
+(define (alist->sdat adat)
+ (map (lambda (dat)
+ (conc (car dat) " " (cdr dat)))
+ adat))
+
+;; adat is the incoming alist, aspec is the mapping
+;; from incoming key to the artifact key (usually one
+;; letter to keep data tight) see the artifactspec at the
+;; top of this file
+;;
+;; NOTE: alists can contain multiple instances of the same key (supported fine by artifacts)
+;; but you (obviously I suppose) cannot use alist-ref to access those entries.
+;;
+(define (alist->artifact adat aspec #!key (ptype #f)(no-d #f))
+ (let* ((artifact-type (or ptype
+ (alist-ref 'T adat) ;; can provide in the incoming alist
+ #f))
+ (artifact-spec (if artifact-type ;; alist of external-key -> key
+ (or (alist-ref artifact-type aspec) '())
+ (if (null? aspec)
+ '()
+ (cdar aspec)))) ;; default to first one if nothing specified
+ (new-alist (map (lambda (dat)
+ (let* ((key (car dat))
+ (val (cdr dat))
+ (newkey (or (alist-ref key artifact-spec)
+ key)))
+ (cons newkey (escape-data (conc val))))) ;; convert all incoming data (symbols, numbers etc.) to a string and then escape newlines.
+ adat))
+ (new-with-type (if (alist-ref 'T new-alist)
+ new-alist
+ (cons `(T . ,artifact-type) new-alist)))
+ (with-d-card (if (or no-d ;; no timestamp wanted
+ (alist-ref 'D new-with-type))
+ new-with-type
+ (cons `(D . ,(current-seconds))
+ new-with-type))))
+ (add-z-card
+ (alist->sdat with-d-card))))
+
+;;======================================================================
+;; D B Q U E U E I N T E R F A C E
+;;======================================================================
+
+;; artifacts (
+;; id SERIAL PRIMARY KEY,
+;; uuid TEXT NOT NULL,
+;; parent_uuid TEXT default '',
+;; artifact_type INTEGER DEFAULT 0,
+;; group_id INTEGER NOT NULL,
+;; artifact TEXT NOT NULL
+
+;; schema is list of SQL statements - can be used to extend db with more tables
+;;
+(define (open-queue-db dbpath dbfile #!key (schema '()))
+ (let* ((dbfname (conc dbpath "/" dbfile))
+ (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f)))
+ (db (open-database dbfname)))
+ ;; (set-busy-handler! (dbi:db-conn db) (busy-timeout 10000))
+ (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness.
+ (for-each
+ (lambda (stmt)
+ (execute db stmt))
+ (cons "CREATE TABLE IF NOT EXISTS artifacts
+ (id INTEGER PRIMARY KEY,
+ group_id INTEGER NOT NULL,
+ uuid TEXT NOT NULL,
+ parent_uuid TEXT TEXT DEFAULT '',
+ artifact_type TEXT NOT NULL,
+ artifact TEXT NOT NULL,
+ processed INTEGER DEFAULT 0)"
+ schema))) ;; 0=not processed, 1=processed, 2... for expansion
+ db))
+
+(define (add-to-queue db artifact uuid artifact-type parent-uuid group-id)
+ (execute db "INSERT INTO artifacts (uuid,parent_uuid,artifact_type,artifact,group_id)
+ VALUES(?,?,?,?,?);" ;; $1,$2,$3,$4,$5);"
+ uuid
+ (if parent-uuid parent-uuid "");; use null string as placeholder for no parent uuid.
+ (if artifact-type (conc artifact-type) "")
+ artifact
+ group-id))
+
+;; given all needed parameters create a artifact and store it in the queue
+;; procs is an alist that maps artifact-type to a function that takes a list of artifact params
+;; in data and returns the uuid and artifact
+;;
+(define (create-and-queue conn procs artifact-type parent-uuid group-id data)
+ (let ((proc (alist-ref artifact-type procs)))
+ (if proc
+ (let-values (( (uuid artifact) (proc data) ))
+ (add-to-queue conn artifact uuid artifact-type parent-uuid group-id)
+ uuid)
+ #f)))
+
+;; given uuid get artifact, if group-id is specified use it (reduces probablity of
+;; being messed up by a uuid collision)
+;;
+(define (lookup-by-uuid db artifact-uuid group-id)
+ (if group-id
+ (get-one db "SELECT artifact FROM artifacts WHERE group_id=? AND uuid=?;" group-id artifact-uuid)
+ (get-one db "SELECT artifact FROM artifacts WHERE uuid=?;" artifact-uuid)))
+
+;; find a packet by its id
+;;
+(define (lookup-by-id db id)
+ (get-one db "SELECT artifact FROM artifacts WHERE id=?;" id))
+
+
+;;======================================================================
+;; P R O C E S S P K T S
+;;======================================================================
+
+;; given a list of field values pulled from the queue db generate a list
+;; of dartifact's
+;;
+(define (dblst->dartifacts lst . altmap)
+ (let* ((maplst (if (null? altmap)
+ '(id group-id uuid parent artifact-type artifact processed)
+ altmap))
+ (res (map cons maplst lst))) ;; produces list of pairs, i.e an alist
+ (cons `(aartifact . ,(artifact->alist (alist-ref 'artifact res)))
+ res)))
+
+;; NB// ptypes is a list of symbols, '() or #f find all types
+;;
+(define (get-dartifacts db ptypes group-id parent-uuid #!key (uuid #f))
+ (let* ((ptype-qry (if (and ptypes
+ (not (null? ptypes)))
+ (conc " IN ('" (string-intersperse (map conc ptypes) "','") "')")
+ (conc " LIKE '%' ")))
+ (rows (get-rows
+ db
+ (conc
+ "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
+ WHERE artifact_type " ptype-qry " AND group_id=?
+ AND processed=0 "
+ (if parent-uuid (conc "AND parent_uuid='" parent-uuid "' ") "")
+ (if uuid (conc "AND uuid='" uuid "' ") "")
+ "ORDER BY id DESC;")
+ group-id)))
+ (map dblst->dartifacts (map vector->list rows))))
+
+;; get N artifacts not yet processed for group-id
+;;
+(define (get-not-processed-artifacts db group-id artifact-type limit offset)
+ (map dblst->dartifacts
+ (map vector->list
+ (get-rows
+ db
+ "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
+ WHERE artifact_type = ? AND group_id = ? AND processed=0
+ LIMIT ? OFFSET ?;"
+ (conc artifact-type) ;; convert symbols to string
+ group-id
+ limit
+ offset
+ ))))
+
+;; given a uuid, get not processed child artifacts
+;;
+(define (get-related db group-id uuid)
+ (map dblst->dartifacts
+ (get-rows
+ db
+ "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
+ WHERE parent_uuid=? AND group_id=? AND processed=0;"
+ uuid group-id)))
+
+;; generic artifact processor
+;;
+;; find all packets in group-id of type in ptypes and apply proc to artifactdat
+;;
+(define (process-artifacts conn group-id ptypes parent-uuid proc)
+ (let* ((artifacts (get-dartifacts conn ptypes group-id parent-uuid)))
+ (map proc artifacts)))
+
+;; criteria is an alist ((k . valpatt) ...)
+;; - valpatt is a regex
+;; - ptypes is a list of types (symbols expected)
+;; match-type: 'any or 'all
+;;
+(define (find-artifacts db ptypes criteria #!key (processed #f)(match-type 'any)(artifact-spec #f)) ;; processed=#f, don't use, else use
+ (let* ((artifacts (get-dartifacts db ptypes 0 #f))
+ (match-rules (lambda (artifactdat) ;; returns a list of matching rules
+ (filter (lambda (c)
+ ;; (print "c: " c)
+ (let* ((ctype (car c)) ;; card type
+ (rx (cdr c)) ;; card pattern
+ ;; (t (alist-ref 'artifact-type artifactdat))
+ (artifact (alist-ref 'artifact artifactdat))
+ (aartifact (artifact->alist artifact))
+ (cdat (alist-ref ctype aartifact)))
+ ;; (print "cdat: " cdat) ;; " aartifact: " aartifact)
+ (if cdat
+ (string-match rx cdat)
+ #f)))
+ criteria)))
+ (res (filter (lambda (artifactdat)
+ (if (null? criteria) ;; looking for all artifacts
+ #t
+ (case match-type
+ ((any)(not (null? (match-rules artifactdat))))
+ ((all)(eq? (length (match-rules artifactdat))(length criteria)))
+ (else
+ (print "ERROR: bad match type " match-type ", expecting any or all.")))))
+ artifacts)))
+ (if artifact-spec
+ (dartifacts->alists res artifact-spec)
+ res)))
+
+;; get descendents of parent-uuid
+;;
+;; NOTE: Should be doing something like the following:
+;;
+;; given a uuid, get not processed child artifacts
+;; processed:
+;; #f => get all
+;; 0 => get not processed
+;; 1 => get processed
+;;
+(define (get-ancestors db group-id uuid #!key (processed #f))
+ (map dblst->dartifacts
+ (map vector->list
+ (get-rows
+ db
+ (conc
+ "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed
+ FROM artifacts
+ WHERE uuid IN
+ (WITH RECURSIVE
+ tree(uuid,parent_uuid)
+ AS
+ (
+ SELECT uuid, parent_uuid
+ FROM artifacts
+ WHERE uuid = ?
+ UNION ALL
+ SELECT t.uuid, t.parent_uuid
+ FROM artifacts t
+ JOIN tree ON t.uuid = tree.parent_uuid
+ )
+ SELECT uuid FROM tree)
+ AND group_id=?" (if processed (conc " AND processed=" processed) "") ";")
+ uuid group-id))))
+
+;; Untested
+;;
+(define (get-descendents db group-id uuid #!key (processed #f))
+ (map dblst->dartifacts
+ (map vector->list
+ (get-rows
+ db
+ (conc
+ "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed
+ FROM artifacts
+ WHERE uuid IN
+ (WITH RECURSIVE
+ tree(uuid,parent_uuid)
+ AS
+ (
+ SELECT uuid, parent_uuid
+ FROM artifacts
+ WHERE uuid = ?
+ UNION ALL
+ SELECT t.uuid, t.parent_uuid
+ FROM artifacts t
+ JOIN tree ON t.parent_uuid = tree.uuid
+ )
+ SELECT uuid FROM tree)
+ AND group_id=?" (if processed (conc " AND processed=" processed) "") ";")
+ uuid group-id))))
+
+;; look up descendents based on given info unless passed in a list via inlst
+;;
+;; (define (get-last-descendent db group-id uuid #!key (processed #f)(inlst #f))
+;; (let ((descendents (or inlst (get-descendents db group-id uuid processed: processed))))
+;; (if (null? descendents)
+;; #f
+;; (last descendents))))
+
+;;======================================================================
+;; A R C H I V E S - always to a sqlite3 db
+;;======================================================================
+
+;; open an archive db
+;; path: archive-dir//month.db
+;;
+#;(define (archive-open-db archive-dir)
+ (let* ((curr-time (seconds->local-time (current-seconds)))
+ (dbpath (conc archive-dir "/" (time->string curr-time "%Y")))
+ (dbfile (conc dbpath "/" (time->string curr-time "%m") ".db"))
+ (dbexists (if (file-exists? dbfile) #t (begin (create-directory dbpath #t) #f))))
+ (let ((db (open-database dbfile)))
+ ;; (set-busy-handler! db (busy-timeout 10000))
+ (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness.
+ (execute db "CREATE TABLE IF NOT EXISTS artifacts
+ (id INTEGER,
+ group_id INTEGER,
+ uuid TEXT,
+ parent_uuid TEXT,
+ artifact_type TEXT,
+ artifact TEXT,
+ processed INTEGER DEFAULT 0)"))
+ db)))
+
+;; turn on transactions! otherwise this will be painfully slow
+;;
+#;(define (write-archive-artifacts src-db db artifact-ids)
+ (let ((artifacts (get-rows
+ src-db
+ (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact FROM artifacts WHERE id IN ("
+ (string-intersperse (map conc artifact-ids) ",") ")"))))
+ ;; (dbi:with-transaction
+ ;; db
+ (lambda ()
+ (for-each
+ (lambda (artifact)
+ (apply execute db "INSERT INTO artifacts (id,group_id,uuid,parent_uuid,artifact_type,artifact)
+ VALUES (?,?,?,?,?,?)"
+ artifact))
+ artifacts)))) ;; )
+
+;; given a list of uuids and lists of uuids move all to
+;; the sqlite3 db for the current archive period
+;;
+#;(define (archive-artifacts conn artifact-ids archive-dir)
+ (let ((db (archive-open-db archive-dir)))
+ (write-archive-artifacts conn db artifact-ids)
+ (finalize! db))
+ ;; (pg:with-transaction
+ ;; conn
+ ;; (lambda ()
+ (for-each
+ (lambda (id)
+ (get-one
+ conn
+ "DELETE FROM artifacts WHERE id=?" id))
+ artifact-ids)) ;; ))
+
+;; given a list of ids mark all as processed
+;;
+(define (mark-processed conn artifact-ids)
+ ;; (pg:with-transaction
+ ;; conn
+ ;; (lambda ()
+ (for-each
+ (lambda (id)
+ (get-one
+ conn
+ "UPDATE artifacts SET processed=1 WHERE id=?;" id))
+ artifact-ids)) ;; x))
+
+;; a generic artifact getter, gets from the artifacts db
+;;
+(define (get-artifacts conn ptypes)
+ (let* ((ptypes-str (if (null? ptypes)
+ ""
+ (conc " WHERE artifact_type IN ('" (string-intersperse ptypes ",") "') ")))
+ (qry-str (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts" ptypes-str)))
+ (map vector->list (get-rows conn qry-str))))
+
+;; make a report of the artifacts in the db
+;; ptypes of '() gets all artifacts
+;; display-fields
+;;
+(define (make-report dest conn artifactspec display-fields . ptypes)
+ (let* (;; (conn (dbi:db-conn (s:db)))
+ (all-rows (get-artifacts conn ptypes))
+ (all-artifacts (flatten-all
+ all-rows
+ artifactspec
+ 'id 'group-id 'uuid 'parent 'artifact-type 'artifact 'processed))
+ (by-uuid (let ((ht (make-hash-table)))
+ (for-each
+ (lambda (artifact)
+ (let ((uuid (alist-ref 'uuid artifact)))
+ (hash-table-set! ht uuid artifact)))
+ all-artifacts)
+ ht))
+ (by-parent (let ((ht (make-hash-table)))
+ (for-each
+ (lambda (artifact)
+ (let ((parent (alist-ref 'parent artifact)))
+ (hash-table-set! ht parent (cons artifact (hash-table-ref/default ht parent '())))))
+ all-artifacts)
+ ht))
+ (oup (if dest (open-output-file dest) (current-output-port))))
+
+ (with-output-to-port
+ oup
+ (lambda ()
+ (print "digraph megatest_state_status {
+ // ranksep=0.05
+ rankdir=LR;
+ node [shape=\"box\"];
+")
+ ;; first all the names
+ (for-each
+ (lambda (artifact)
+ (let* ((uuid (alist-ref 'uuid artifact))
+ (shortuuid (substring uuid 0 4))
+ (type (alist-ref 'artifact-type artifact))
+ (processed (alist-ref 'processed artifact)))
+
+ (print "\"" uuid "\" [label=\"" shortuuid ", ("
+ type ", "
+ (if processed "processed" "not processed") ")")
+ (for-each
+ (lambda (key-field)
+ (let ((val (alist-ref key-field artifact)))
+ (if val
+ (print key-field "=" val))))
+ display-fields)
+ (print "\" ];")))
+ all-artifacts)
+ ;; now for parent-child relationships
+ (for-each
+ (lambda (artifact)
+ (let ((uuid (alist-ref 'uuid artifact))
+ (parent (alist-ref 'parent artifact)))
+ (if (not (equal? parent ""))
+ (print "\"" parent "\" -> \"" uuid"\";"))))
+ all-artifacts)
+
+ (print "}")
+ ))
+ (if dest
+ (begin
+ (close-output-port oup)
+ (system "dot -Tpdf out.dot -o out.pdf")))
+
+ ))
+
+;;======================================================================
+;; Read ref artifacts into a vector < laststr hash table >
+;;======================================================================
+
+
+
+;;======================================================================
+;; Read/write packets to files (convience functions)
+;;======================================================================
+
+;; write alist to a artifact file
+;;
+(define (write-alist->artifact targdir dat #!key (artifactspec '())(ptype #f))
+ (let-values (((uuid artifact)(alist->artifact dat artifactspec ptype: ptype)))
+ (with-output-to-file (conc targdir "/" uuid ".artifact")
+ (lambda ()
+ (print artifact)))
+ uuid)) ;; return the uuid
+
+;; read artifact into alist
+;;
+(define (read-artifact->alist artifact-file #!key (artifactspec #f))
+ (artifact->alist (with-input-from-file
+ artifact-file
+ read-string)
+ artifactspec: artifactspec))
+
+;;======================================================================
+;; File utils, stuff useful for file management
+;;======================================================================
+
+(define (file-get-sha1 fname)
+ (let* ((sha1-res (run/strings (sha1sum ,fname))))
+ (car (string-split (car sha1-res)))))
+
+(define (link-or-copy srcf destf)
+ (or (handle-exceptions
+ exn
+ #f
+ (file-link srcf destf))
+ (if (file-exists? destf)
+ (print "NOTE: destination already exists, skipping copy.")
+ (copy-file srcf destf))))
+
+;; (define (files-diff file1 file2)
+;; (let* ((diff-res (with-input-from-port
+;; (run/port (diff "-q" ,file1 ,file2))
+;; (lambda ()
+;; (let* ((res (read-line)))
+;; (read-lines)
+;; res)))))
+;; (car (string-split sha1-res))))
+;;
+
+
+(define (check-same file1 file2)
+ (cond
+ ((not (and (file-exists? file1)(file-exists? file2))) #f)
+ ((not (equal? (file-size file1)(file-size file2))) #f)
+ (else
+ (let-values (((status run-ok process-id)
+ (run (diff "-q" ,file1 ,file2))))
+ status))))
+
+(define *pcache* (make-hash-table))
+(define (get-device dir)
+ (let ((indat (or (hash-table-ref/default *pcache* dir #f)
+ (let* ((inp (open-input-pipe (conc "df --output=source \""dir"\"")))
+ (res (read-lines inp)))
+ (close-input-port inp)
+ (hash-table-set! *pcache* dir res)
+ res))))
+ (cadr indat)))
+
+(define (same-partition? dir1 dir2)
+ (equal? (get-device dir1)(get-device dir2)))
+
+(define (link-if-same-partition file1 file2)
+ (let* ((dir1 (pathname-directory file1))
+ (dir2 (pathname-directory file2))
+ (f1 (pathname-file file1))
+ (f2 (pathname-file file2)))
+ (if (same-partition? dir1 dir2)
+ (let* ((tmpname (conc "."f2"-"(current-seconds))))
+ ;; this steps needs to be executed as actual user
+ (move-file file2 (conc dir1 "/" tmpname))
+ (file-link file1 file2)
+ (delete-file (conc dir1 "/" tmpname))))))
+
+(define (uuid-first-two-letters sha1sum)
+ (substring sha1sum 0 2))
+
+(define (uuid-remaining-letters sha1sum)
+ (let ((slen (string-length sha1sum)))
+ (substring sha1sum 2 slen)))
+
+(define (archive-dest destd sha1sum)
+ (let* ((subdir (uuid-first-two-letters sha1sum)) ;; (substring sha1sum 0 2))
+ ;; (slen (string-length sha1sum))
+ (rem sha1sum #;(uuid-remaining-letters sha1sum)) ;; (substring sha1sum 3 slen))
+ (full-dest-dir (conc destd"/"subdir))
+ (full-dest-file (conc full-dest-dir"/"rem)))
+ (if (not (directory-exists? full-dest-dir))
+ (create-directory full-dest-dir #t))
+ full-dest-file))
+
+(define (write-to-archive data destd #!optional (nextnum #f))
+ (let* ((sha1sum (calc-sha1 data))
+ (full-dest (conc (archive-dest destd sha1sum)
+ (if nextnum (conc "."nextnum) ""))))
+ (if (file-exists? full-dest)
+ (if (equal? (string-intersperse (with-input-from-file full-dest read-lines) "\n")
+ data)
+ (begin
+ ;; (print "INFO: data already exists in "full-dest" and is identical")
+ sha1sum)
+ (let ((nextnum (if nextnum (+ nextnum 1) 0)))
+ (print "WARN: data already exists in "full-dest" but is different! Trying again...")
+ (write-to-archive data destd nextnum)))
+ (begin
+ (with-output-to-file
+ full-dest
+ (lambda ()
+ (print data)))
+ sha1sum)))) ;; BUG? Does print munge data?
+
+;; copy srcf with sha1sum aabc... to aa/bc...
+;;
+(define (archive-copy srcf destd sha1sum)
+ (let* ((full-dest-file (archive-dest destd sha1sum)))
+ (let loop ((trynum 0))
+ (let ((dest-name (if (> trynum 0)
+ (conc full-dest-file"-"trynum)
+ full-dest-file)))
+ (cond
+ ((not (file-exists? srcf)) #f) ;; this should be an error?
+ ((and (file-exists? srcf)
+ (file-exists? dest-name))
+ (if (check-same srcf dest-name)
+ (link-if-same-partition dest-name srcf)
+ (loop (+ trynum 1)))) ;; collisions are rare, this protects against them
+ ((not (file-exists? dest-name))
+ (link-or-copy srcf dest-name))
+ (else #f))))))
+
+;; multi-glob
+(define (multi-glob globstrs inpath)
+ ;; (print "multi-glob: "globstrs", "inpath)
+ (if (equal? inpath "")
+ globstrs
+ (let* ((parts (string-split inpath "/" #t))
+ (nextpart (car parts))
+ (remaining (string-intersperse (cdr parts) "/")))
+ (if (and (equal? nextpart "") ;; this must be a leading / meaning root directory
+ (null? globstrs))
+ (multi-glob '("/") remaining)
+ (begin
+ ;; (print "nextpart="nextpart", remaining="remaining)
+ (apply append
+ (map (lambda (gstr)
+ (let* ((pathstr (conc gstr"/"nextpart))
+ (pathstrs (glob pathstr)))
+ ;; (print "pathstr="pathstr)
+ (multi-glob pathstrs remaining)))
+ globstrs)))))))
+
+
+;; perm[/user:group]:
+;; DDD - octal perm (future expansion)
+;; - - use umask/defacto perms (i.e. don't actively do anything)
+;; x - mark as executable
+;;
+;; Cards:
+;; file: f perm fname
+;; directory: d perm fname artifactid
+;; link: l perm lname destpath
+;;
+;; NOTE: cards are kept as (C . "value")
+;;
+;; given a directory path, ignore list and artifact store (hash-table):
+;; 1. create sha1 tree at dest (e.g. aa/b3a7 ...)
+;; 2. create artifact for each dir
+;; - cards for all files
+;; - cards for files that are symlinks or executables
+;; 3. return (artifactid . artifact)
+;;
+;; NOTES:
+;; Use destdir of #f to not create sha1 tree
+;; Hard links will be used if srcdir and destdir appear to be same partion
+;;
+;; (alist->artifact adat aspec #!key (ptype #f))
+;;
+;;
+;; (load "../../artifacts/artifacts.scm")(import big-chicken srfi-69 artifacts)(define dirdat (make-hash-table))
+;; (capture-dir ".." ".." "/tmp/junk" '() dirdat)
+;;
+;; [procedure] (file-type FILE [LINK [ERROR]])
+;; Returns the file-type for FILE, which should be a filename, a file-descriptor or a port object. If LINK is given and true, symbolic-links are not followed:
+;;
+;; regular-file
+;; directory
+;; fifo
+;; socket
+;; symbolic-link
+;; character-device
+;; block-device
+;; Note that not all types are supported on every platform. If ERROR is given and false, then file-type returns #f if the file does not exist; otherwise, it signals an error.
+;;
+;;
+(define (capture-dir curr-dir src-dir dest-dir ignore-list artifacts all-seen)
+ (let* ((dir-dat (directory-fold
+ (lambda (fname res) ;; res is a list of artifact cards
+ (let* ((fullname (conc curr-dir"/"fname)))
+ ;; (print "INFO: processing "fullname)
+ (if (hash-table-ref/default all-seen fullname #f) ;; something circular going on
+ (begin
+ (print "WARNING: possible circular link(s) "fullname)
+ res)
+ (let* ((ftype (file-type fullname #t #f)))
+ (hash-table-set! all-seen fullname ftype)
+ (cons
+ (case ftype ;; get the card
+ ((directory) ;; (directory? fullname)
+ (let* ((new-curr-dir (conc curr-dir"/"fname))
+ (new-src-dir (conc src-dir"/"fname)))
+ (let* ((dir-dat (capture-dir new-curr-dir new-src-dir
+ dest-dir ignore-list artifacts all-seen))
+ (a-id (car dir-dat))
+ (artf (cdr dir-dat)))
+ (hash-table-set! artifacts a-id artf)
+ (cons 'd (conc "- "a-id" "fname))))) ;; the card
+ ((symbolic-link) ;; (symbolic-link? fullname)
+ (let ((ldest (read-symbolic-link fullname)))
+ (cons 'l (conc "- "fname"/"ldest)))) ;; delimit link name from dest with /
+ ((regular-file) ;; must be a file
+ (let* ((start (current-seconds))
+ (sha1sum (file-get-sha1 fullname))
+ (perms (if (file-executable? fullname) "x" "-")))
+ (let ((runtime (- (current-seconds) start)))
+ (if (> runtime 1)
+ (print "INFO: file "fullname" took "runtime" seconds to calculate sha1.")))
+ (if dest-dir
+ (archive-copy fullname dest-dir sha1sum))
+ (cons 'f (conc perms " "sha1sum" "fname))))
+ (else
+ (print "WARNING: file "fullname" of type "ftype" is NOT supported and will converted to empty file.")
+ (let* ((sha1sum (write-to-archive "" dest-dir)))
+ (cons 'f (conc "- "sha1sum" "fname)))))
+ res)))))
+ '() src-dir #:dotfiles? #t))) ;; => (values srcdir_artifact sub_artifacts_list)
+ ;; (print "dir-dat: " dir-dat)
+ (let-values (((a-id artf)
+ (alist->artifact dir-dat '() ptype: 'd no-d: #t)))
+ (hash-table-set! artifacts a-id artf)
+ (cons a-id artf))))
+
+;; maybe move this into artifacts?
+;;
+;; currently moves *.artifact into a bundle and moves the artifacts into attic
+;; future: move artifacts under 1 meg in size into bundle up to 10 meg in size
+;;
+(define (artifact-rollup bundle-dir) ;; cfg storepath)
+ ;; (let* ((bundle-dir (calc-bundle-dir cfg storepath)))
+ (let* ((bundles (glob (conc bundle-dir"/*.bundle")))
+ (artifacts (glob (conc bundle-dir"/*.artifact"))))
+ (if (> (length artifacts) 30) ;; rollup only if > 30 artifacts
+ ;; if we have unbundled artifacts, bundle them
+ (let* ((ht (read-artifacts-into-hash #f artifacts: artifacts))
+ (bundle (hash-of-artifacts->bundle ht)))
+ (write-bundle bundle bundle-dir)
+ (create-directory (conc bundle-dir"/attic") #t)
+ (for-each
+ (lambda (full-fname)
+ (let* ((fname (pathname-strip-directory full-fname))
+ (newname (conc bundle-dir"/attic/"fname)))
+ (move-file full-fname newname #t)))
+ artifacts)
+ (conc "bundled "(length artifacts)))
+ "not enough artifacts to bundle")))
+
+;; if destfile is a directory then calculate the sha1sum of the bundle and store it
+;; by .bundle
+;;
+;; incoming dat is pure text (bundle already sorted and appended:
+;;
+(define (write-bundle bdl-data destdir)
+ (let* ((bdl-uuid (calc-sha1 bdl-data)))
+ (with-output-to-file
+ (conc destdir"/"bdl-uuid".bundle")
+ (lambda ()
+ (print bdl-data)))))
+
+;; minimal (and hopefully fast) artifact reader
+;; TODO: Add check of shar sum.
+;;
+(define (minimal-artifact-read fname)
+ (let* ((indat (with-input-from-file fname read-lines)))
+ (if (null? indat)
+ (values #f (conc "did not find an artifact in "fname))
+ (let* ((zcard (last indat))
+ (cardk (substring zcard 0 1))
+ (cardv (substring zcard 2 (string-length zcard))))
+ (if (equal? cardk "Z")
+ (values cardv (string-intersperse indat "\n"))
+ (values #f (conc fname" is not a valid artifact")))))))
+
+;; read artifacts from directory into hash
+;; NOTE: support for max-count not implemented yet
+;;
+(define (read-artifacts-into-hash dir #!key (artifacts #f) (max-count #f)(ht #f))
+ (let* ((artifacts (or artifacts
+ (glob (conc dir"/*.artifact"))))
+ (ht (or ht (make-hash-table))))
+ (for-each
+ (lambda (fname)
+ (let-values (((uuid afct)
+ (minimal-artifact-read fname)))
+ (hash-table-set! ht uuid afct)))
+ artifacts)
+ ht))
+
+;; ht is:
+;; uuid => artifact text
+;; use write-bundle to put result into a bundle file
+;;
+(define (hash-of-artifacts->bundle ht)
+ (fold (lambda (k res)
+ (let* ((v (hash-table-ref ht k)))
+ (if res
+ (conc res"\n"v)
+ v)))
+ #f
+ (sort (hash-table-keys ht) string<=?)))
+
+;; minimal artifact to alist
+;;
+(define (minimal-artifact->alist afact)
+ (let* ((lines (string-split afact "\n")))
+ (map (lambda (a)
+ (let* ((key (string->symbol (substring a 0 1)))
+ (sl (string-length a))
+ (val (if (> sl 2)
+ (substring a 2 sl)
+ "")))
+ (cons key val)))
+ lines)))
+
+;; some accessors for common cards
+(define (afact-get-D afact)
+ (let ((dval (alist-ref 'D afact)))
+ (if dval
+ (string->number dval)
+ #f)))
+
+(define (afact-get-T afact) ;; get the artifact type as a symbol
+ (let ((val (alist-ref 'T afact)))
+ (if val
+ (string->symbol val)
+ val)))
+
+(define (afact-get-Z afact)
+ (alist-ref 'Z afact))
+
+(define (afact-get afact key default)
+ (or (alist-ref key afact)
+ default))
+
+(define (afact-get-number/default afact key default)
+ (let ((val (alist-ref key afact)))
+ (if val
+ (or (string->number val) default) ;; seems wrong
+ default)))
+
+;; bundles are never big and reading into memory for processing is fine
+;;
+(define (read-bundle srcfile #!optional (mode 'uuid-raw))
+ (let* ((indat (with-input-from-file srcfile read-lines)))
+ (let loop ((tail indat)
+ (dat '()) ;; artifact being extracted
+ (res '())) ;; list of artifacts
+ (if (null? tail)
+ (reverse res) ;; last dat should be empty list
+ (let* ((curr-line (car tail)))
+ (let-values (((ctype cdata)
+ (card->type/value curr-line)))
+ (let* ((is-z-card (eq? 'Z ctype))
+ (new-dat (cons (case mode
+ ((uuid-raw) curr-line)
+ (else (cons ctype cdata)))
+ dat)))
+ (if is-z-card
+ (loop (cdr tail) ;; done with this artifact
+ '()
+ (cons (case mode
+ ((uuid-raw) (cons cdata (string-intersperse (reverse new-dat) "\n")))
+ (else (reverse new-dat)))
+ res))
+ (loop (cdr tail)
+ new-dat
+ res)))))))))
+
+
+;; find all .bundle and .artifacts files in bundle-dir
+;; and inport them into sqlite handle adb
+;;
+(define (refresh-artifacts-db adb bundle-dir)
+ (let* ((bundles (glob (conc bundle-dir"/*.bundle")))
+ (artifacts (glob (conc bundle-dir"/*.artifact")))
+ (uuids (get-all-uuids adb 'hash)))
+ (with-transaction
+ adb
+ (lambda ()
+ (for-each
+ (lambda (bundle-file)
+ ;; (print "Importing artifacts from "bundle-file)
+ (let* ((bdat (read-bundle bundle-file 'uuid-raw))
+ (count 0)
+ (inc (lambda ()(set! count (+ count 1)))))
+ (for-each
+ (lambda (adat)
+ (match
+ adat
+ ((zval . artifact)
+ (if (not (hash-table-exists? uuids zval))
+ (begin
+ ;; (print "INFO: importing new artifact "zval" from bundle "bundle-file)
+ (inc)
+ (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);"
+ zval artifact)
+ (hash-table-set! uuids zval #t))))
+ (else
+ (print "ERROR: Bad artifact data "adat))))
+ bdat)
+ (print "INFO: imported "count" artifacts from "bundle-file)))
+ bundles)
+ (for-each
+ (lambda (artifact-file)
+ ;; (print "Importing artifact from "artifact-file)
+ (let-values (((uuid artifact) (minimal-artifact-read artifact-file)))
+ (if uuid
+ (if (not (hash-table-exists? uuids uuid))
+ (begin
+ ;; (print "INFO: importing new artifact "uuid" from "artifact-file)
+ (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);"
+ uuid artifact)
+ (hash-table-set! uuids uuid #t)))
+ (print "Bad artifact in "artifact-file))))
+ artifacts)))))
+
+;;======================================================================
+;; Artifacts db cache
+;;======================================================================
+
+;; artifacts
+;; id SERIAL PRIMARY KEY,
+;; uuid TEXT NOT NULL,
+;; artifact TEXT NOT NULL
+;;
+;; parents
+;; id INTEGER REFERENCES artids.id, --
+;; parent_id REFERENCES artids.id
+;;
+;; schema is list of SQL statements - can be used to extend db with more tables
+;;
+(define (open-artifacts-db dbpath dbfile #!key (schema '()))
+ (let* ((dbfname (conc dbpath "/" dbfile))
+ (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f)))
+ (adb (open-database dbfname)))
+ (set-busy-handler! adb (make-busy-timeout 10000))
+ (execute adb "PRAGMA synchronous = 0;")
+ (if (not dbexists)
+ (with-transaction
+ adb
+ (lambda ()
+ (for-each
+ (lambda (stmt)
+ (execute adb stmt))
+ (append `("CREATE TABLE IF NOT EXISTS artifacts
+ (id INTEGER PRIMARY KEY,
+ uuid TEXT NOT NULL,
+ artifact TEXT NOT NULL)"
+
+ "CREATE TABLE IF NOT EXISTS parents
+ (id INTEGER REFERENCES artifacts(id) NOT NULL,
+ parent_id INTEGER REFERENCES artifacts(id) NOT NULL)")
+ schema)))))
+ adb))
+
+(define (generate-year-month-name #!optional (seconds #f))
+ (let* ((curr-time (seconds->local-time (or seconds (current-seconds)))))
+ (time->string curr-time "%Y%m")))
+
+;; I don't like this function. TODO: remove the
+;; mode and option to return ht. Use instead the
+;; get-all-artifacts below
+;;
+(define (get-all-uuids adb #!optional (mode #f))
+ (let* ((res (fold-row
+ (lambda (res uuid)
+ (cons uuid res))
+ '()
+ adb
+ "SELECT uuid FROM artifacts;")))
+ (case mode
+ ((hash)
+ (let* ((ht (make-hash-table)))
+ (for-each
+ (lambda (uuid)
+ (hash-table-set! ht uuid #t))
+ res)
+ ht))
+ (else res))))
+
+;; returns raw artifacts (i.e. NOT alists but instead plain text)
+(define (get-all-artifacts adb)
+ (let* ((ht (make-hash-table)))
+ (for-each-row
+ (lambda (id uuid artifact)
+ (hash-table-set! ht uuid `(,id ,uuid ,artifact)))
+ adb
+ "SELECT id,uuid,artifact FROM artifacts;")
+ ht))
+
+;; given a bundle-dir copy or create to /tmp and open
+;; the YYMM.db file and hand the handle to the given proc
+;; NOTE: we operate in /tmp/ to accomodate users on NFS
+;; where slamming Unix locks at an NFS filer can cause
+;; locking fails. Eventually this /tmp behavior will be
+;; configurable.
+;;
+(define (with-todays-adb bundle-dir proc)
+ (let* ((dbname (conc (generate-year-month-name) ".db"))
+ (destname (conc bundle-dir"/"dbname))
+ (tmparea (conc "/tmp/"(current-user-name)"-"(calc-sha1 bundle-dir)))
+ (tmpname (conc tmparea"/"dbname))
+ (lockfile (conc destname".update-in-progress")))
+ ;; (print "with-todays-adb, bundle-dir: "bundle-dir", dbname: "dbname", destname: "destname",\n tmparea: " tmparea", lockfile: "lockfile)
+ (if (not (file-exists? tmparea))(create-directory tmparea #t))
+ (let loop ((count 0))
+ (if (file-exists? lockfile)
+ (if (< count 30) ;; aproximately 30 seconds
+ (begin
+ (sleep 1)
+ (loop (+ 1 count)))
+ (print "ERROR: "lockfile" exists, proceeding anyway"))
+ (if (file-exists? destname)
+ (begin
+ (copy-file destname tmpname #t)
+ (copy-file destname lockfile #t)))))
+ (let* ((adb (open-artifacts-db tmparea dbname))
+ (res (proc adb)))
+ (finalize! adb)
+ (copy-file tmpname destname #t)
+ (delete-file* lockfile)
+ res)))
+
+) ;; module artifacts
+
+;; ATTIC
+
ADDED artifacts/artifacts.setup
Index: artifacts/artifacts.setup
==================================================================
--- /dev/null
+++ artifacts/artifacts.setup
@@ -0,0 +1,11 @@
+;; Copyright 2007-2017, Matthew Welland.
+;;
+;; This program is made available under the GNU GPL version 2.0 or
+;; greater. See the accompanying file COPYING for details.
+;;
+;; This program is distributed WITHOUT ANY WARRANTY; without even the
+;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;; PURPOSE.
+
+;;;; pkts.setup
+(standard-extension 'pkts "1.0")
ADDED artifacts/artifactsrec.scm
Index: artifacts/artifactsrec.scm
==================================================================
--- /dev/null
+++ artifacts/artifactsrec.scm
@@ -0,0 +1,196 @@
+(define-syntax define-record-type
+ (syntax-rules ()
+ ((define-record-type type
+ (constructor constructor-tag ...)
+ predicate
+ (field-tag accessor . more) ...)
+ (begin
+ (define type
+ (make-record-type 'type '(field-tag ...)))
+ (define constructor
+ (record-constructor type '(constructor-tag ...)))
+ (define predicate
+ (record-predicate type))
+ (define-record-field type field-tag accessor . more)
+ ...))))
+
+; An auxilliary macro for define field accessors and modifiers.
+; This is needed only because modifiers are optional.
+
+(define-syntax define-record-field
+ (syntax-rules ()
+ ((define-record-field type field-tag accessor)
+ (define accessor (record-accessor type 'field-tag)))
+ ((define-record-field type field-tag accessor modifier)
+ (begin
+ (define accessor (record-accessor type 'field-tag))
+ (define modifier (record-modifier type 'field-tag))))))
+
+; Record types
+
+; We define the following procedures:
+;
+; (make-record-type ) ->
+; (record-constructor ) ->
+; (record-predicate ) ->
+; (record-accessor ) ->
+; (record-modifier ) ->
+; where
+; ( ...) ->
+; ( ) ->
+; ( ) ->
+; ( ) ->
+
+; Record types are implemented using vector-like records. The first
+; slot of each record contains the record's type, which is itself a
+; record.
+
+(define (record-type record)
+ (record-ref record 0))
+
+;----------------
+; Record types are themselves records, so we first define the type for
+; them. Except for problems with circularities, this could be defined as:
+; (define-record-type :record-type
+; (make-record-type name field-tags)
+; record-type?
+; (name record-type-name)
+; (field-tags record-type-field-tags))
+; As it is, we need to define everything by hand.
+
+(define :record-type (make-record 3))
+(record-set! :record-type 0 :record-type) ; Its type is itself.
+(record-set! :record-type 1 ':record-type)
+(record-set! :record-type 2 '(name field-tags))
+
+; Now that :record-type exists we can define a procedure for making more
+; record types.
+
+(define (make-record-type name field-tags)
+ (let ((new (make-record 3)))
+ (record-set! new 0 :record-type)
+ (record-set! new 1 name)
+ (record-set! new 2 field-tags)
+ new))
+
+; Accessors for record types.
+
+(define (record-type-name record-type)
+ (record-ref record-type 1))
+
+(define (record-type-field-tags record-type)
+ (record-ref record-type 2))
+
+;----------------
+; A utility for getting the offset of a field within a record.
+
+(define (field-index type tag)
+ (let loop ((i 1) (tags (record-type-field-tags type)))
+ (cond ((null? tags)
+ (error "record type has no such field" type tag))
+ ((eq? tag (car tags))
+ i)
+ (else
+ (loop (+ i 1) (cdr tags))))))
+
+;----------------
+; Now we are ready to define RECORD-CONSTRUCTOR and the rest of the
+; procedures used by the macro expansion of DEFINE-RECORD-TYPE.
+
+(define (record-constructor type tags)
+ (let ((size (length (record-type-field-tags type)))
+ (arg-count (length tags))
+ (indexes (map (lambda (tag)
+ (field-index type tag))
+ tags)))
+ (lambda args
+ (if (= (length args)
+ arg-count)
+ (let ((new (make-record (+ size 1))))
+ (record-set! new 0 type)
+ (for-each (lambda (arg i)
+ (record-set! new i arg))
+ args
+ indexes)
+ new)
+ (error "wrong number of arguments to constructor" type args)))))
+
+(define (record-predicate type)
+ (lambda (thing)
+ (and (record? thing)
+ (eq? (record-type thing)
+ type))))
+
+(define (record-accessor type tag)
+ (let ((index (field-index type tag)))
+ (lambda (thing)
+ (if (and (record? thing)
+ (eq? (record-type thing)
+ type))
+ (record-ref thing index)
+ (error "accessor applied to bad value" type tag thing)))))
+
+(define (record-modifier type tag)
+ (let ((index (field-index type tag)))
+ (lambda (thing value)
+ (if (and (record? thing)
+ (eq? (record-type thing)
+ type))
+ (record-set! thing index value)
+ (error "modifier applied to bad value" type tag thing)))))
+
+Records
+
+; This implements a record abstraction that is identical to vectors,
+; except that they are not vectors (VECTOR? returns false when given a
+; record and RECORD? returns false when given a vector). The following
+; procedures are provided:
+; (record? ) ->
+; (make-record ) ->
+; (record-ref ) ->
+; (record-set! ) ->
+;
+; These can implemented in R5RS Scheme as vectors with a distinguishing
+; value at index zero, providing VECTOR? is redefined to be a procedure
+; that returns false if its argument contains the distinguishing record
+; value. EVAL is also redefined to use the new value of VECTOR?.
+
+; Define the marker and redefine VECTOR? and EVAL.
+
+(define record-marker (list 'record-marker))
+
+(define real-vector? vector?)
+
+(define (vector? x)
+ (and (real-vector? x)
+ (or (= 0 (vector-length x))
+ (not (eq? (vector-ref x 0)
+ record-marker)))))
+
+; This won't work if ENV is the interaction environment and someone has
+; redefined LAMBDA there.
+
+(define eval
+ (let ((real-eval eval))
+ (lambda (exp env)
+ ((real-eval `(lambda (vector?) ,exp))
+ vector?))))
+
+; Definitions of the record procedures.
+
+(define (record? x)
+ (and (real-vector? x)
+ (< 0 (vector-length x))
+ (eq? (vector-ref x 0)
+ record-marker)))
+
+(define (make-record size)
+ (let ((new (make-vector (+ size 1))))
+ (vector-set! new 0 record-marker)
+ new))
+
+(define (record-ref record index)
+ (vector-ref record (+ index 1)))
+
+(define (record-set! record index value)
+ (vector-set! record (+ index 1) value))
ADDED artifacts/tests/run.scm
Index: artifacts/tests/run.scm
==================================================================
--- /dev/null
+++ artifacts/tests/run.scm
@@ -0,0 +1,139 @@
+(use test)
+
+;; (use (prefix pkts pkts:))
+(use pkts (prefix dbi dbi:))
+;; (use trace)(trace sdat->alist pkt->alist)
+
+(if (file-exists? "queue.db")(delete-file "queue.db"))
+
+(test-begin "pkts and pkt archives")
+
+;;======================================================================
+;; Basic pkt creation, parsing and conversion routines
+;;======================================================================
+
+(test-begin "basic packets")
+(test #f '(A "This is a packet") (let-values (((t v)
+ (card->type/value "A This is a packet")))
+ (list t v)))
+(test #f "A A\nZ 664449e7299e0065a3e25c138ccef2df13ba291e"
+ (let-values (((uuid res)
+ (add-z-card '("A A"))))
+ res))
+(test #f '("CC C++" "D 1486332719.0" "a A" "b C")(sort (construct-sdat 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0)
+ string<=?))
+(define pkt-example #f)
+(test #f "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"
+ (let-values (((uuid res)
+ (construct-pkt 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0)))
+ (set! pkt-example (cons uuid res))
+ res))
+(test-end "basic packets")
+
+;;======================================================================
+;; Sqlite and postgresql based queue of pkts
+;;======================================================================
+
+(test-begin "pkt queue")
+(define db #f)
+(test #f 'sqlite3 (let ((dbh (open-queue-db "." "queue.db")))
+ (set! db dbh)
+ (dbi:db-dbtype dbh)))
+(test #f (cdr pkt-example)
+ (begin
+ (add-to-queue db (cdr pkt-example)(car pkt-example) 'basic #f 0)
+ (lookup-by-uuid db (car pkt-example) 0)))
+(test #f (cdr pkt-example)
+ (lookup-by-id db 1))
+(test #f 1 (length (find-pkts db '(basic) '())))
+
+(test-end "pkt queue")
+
+
+;;======================================================================
+;; Process groups of pkts
+;;======================================================================
+
+(test-begin "lists of packets")
+(test #f '((apkt . #f) (id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) ;; ((id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5))
+ (dblst->dpkts '(1 2 3 4 5)))
+(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0)))
+ ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
+ ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
+ (get-dpkts db '(basic) 0 #f))
+(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0)))
+ ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
+ ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
+ (get-not-processed-pkts db 0 'basic 1000 0))
+(test-end "lists of packets")
+
+(test-begin "pkts as alists")
+(define pktspec '((posting . ((title . t) ;; NOTE: same as ((posting (title . t)(url . u)(blub . b)) ...
+ (url . u)
+ (blurb . b)))
+ (comment . ((comment . c)
+ (score . s)))
+ (basic . ((b-field . b)
+ (a-field . a)))))
+(define pktlst (find-pkts db '(basic) '()))
+(define dpkt (car pktlst))
+(test #f "A" (get-value 'a-field dpkt pktspec))
+
+(test #f "C" (alist-ref 'b-field (dpkt->alist dpkt pktspec)))
+
+(define basic-spec '((nada (foo . b)(bar . f))(basic (foo . f)(bar . b))))
+(define test-pkt '((foo . "fooval")(bar . "barval")))
+(let*-values (((u p) (alist->pkt test-pkt basic-spec ptype: 'basic))
+ ((apkt) (pkt->alist p))
+ ((bpkt) (pkt->alist p pktspec: basic-spec)))
+ (test #f "fooval" (alist-ref 'f apkt))
+ (test #f "fooval" (alist-ref 'foo bpkt))
+ (test #f #f (alist-ref 'f bpkt)))
+
+(test-end "pkts as alists")
+
+(test-begin "descendents and ancestors")
+
+(define (get-uuid pkt)(alist-ref 'uuid pkt))
+
+;; add a child to 263e
+(let-values (((uuid pkt)
+ (construct-pkt 'x "X" 'y "Y" 'P "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"
+ 'D "1486332719.0")))
+ (add-to-queue db pkt uuid 'basic "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" 0))
+
+(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8")
+ (map (lambda (x)(alist-ref 'uuid x))
+ (get-descendents
+ db 0
+ "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
+
+(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8")
+ (map (lambda (x)(alist-ref 'uuid x))
+ (get-ancestors
+ db 0
+ "818fe30988c9673441b8f203972a8bda6af682f8")))
+
+(test-end "descendents and ancestors")
+
+(test-end "pkts and pkt archives")
+
+(test-begin "pktsdb")
+
+(define spec '((tests (testname n TEXT)
+ (testpath p TEXT)
+ (duration d INTEGER))))
+;; (define pktsdb (make-pktdb))
+;; (pktdb-pktsdb-spec-set! pktsdb spec)
+
+(define pktsdb #f)
+
+(test #f #t (dbi:database? (let ((pdb (pktsdb-open "test.db" spec)))
+ (set! pktsdb pdb)
+ (pktdb-conn pdb))))
+;; (pp (pktdb-pktspec pktsdb))
+(test #f #t (pktsdb-add-record pktsdb 'tests '((testname . "test1"))))
+
+(pktsdb-close pktsdb)
+
+(test-end "pktsdb")
Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -27,10 +27,19 @@
(declare (unit client))
(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
+(declare (uses commonmod))
+(import commonmod)
+
+(module client
+*
+
+)
+
+(import client)
(include "common_records.scm")
(include "db_records.scm")
;; client:get-signature
@@ -44,13 +53,10 @@
#;(define (client:logout serverdat)
(let ((ok (and (socket? serverdat)
(cdb:logout serverdat *toppath* (client:get-signature)))))
ok))
-(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
- (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
-
;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios.
;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline
@@ -60,17 +66,27 @@
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
-(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
+;;(define (http-transport:server-dat-make-url runremote)
+(define (client:get-url runremote)
+ (if (and (remote-iface runremote)
+ (remote-port runremote))
+ (conc "http://"
+ (remote-iface runremote)
+ ":"
+ (remote-port runremote))
+ #f))
+
+(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
(mutex-lock! *rmt-mutex*)
- (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
+ (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
(mutex-unlock! *rmt-mutex*)
res))
-(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
+(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0))
(debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
(server:start-and-wait areapath)
(if (<= remaining-tries 0)
(begin
(debug:print-error 0 *default-log-port* "failed to start or connect to server")
@@ -77,52 +93,72 @@
(exit 1))
;;
;; Alternatively here, we can get the list of candidate servers and work our way
;; through them searching for a good one.
;;
- (let* ((server-dat (server:choose-server areapath 'best))
- (runremote (or area-dat *runremote*)))
+ (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid
+;; (runremote (or area-dat *runremote*)))
(if (not server-dat) ;; no server found
- (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
+ (begin
+ (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time
+ (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
(match server-dat
((host port start-time server-id pid)
(debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
- (if (and (not area-dat)
- (not *runremote*))
- (begin
- (set! *runremote* (make-remote))
- (let* ((server-info (remote-server-info *runremote*)))
+ (if (not runremote)
+ (begin
+ ;; Here we are creating a runremote where there was none or it was clobbered with #f
+ ;;
+ (set! runremote (make-and-init-remote))
+ (let* ((server-info (server:check-if-running areapath)))
+ (remote-server-info-set! runremote server-info)
(if server-info
(begin
- (remote-server-url-set! *runremote* (server:record->url server-info))
- (remote-server-id-set! *runremote* (server:record->id server-info)))))))
+ (remote-server-url-set! runremote (server:record->url server-info))
+ (remote-server-id-set! runremote (server:record->id server-info)))))))
+ ;; at this point we have a runremote
(if (and host port server-id)
- (let* ((start-res (http-transport:client-connect host port server-id))
- (ping-res (rmt:login-no-auto-client-setup start-res)))
- (if (and start-res
- ping-res)
- (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
- (if runremote
- (begin
- (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
- (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
- start-res)
- (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
+ (let* ((nada (client:connect host port server-id runremote))
+ (ping-res (rmt:login-no-auto-client-setup runremote)))
+ (if ping-res
+ (if runremote
+ (begin
+ (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote))
+ runremote)
+ (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
(begin ;; login failed but have a server record, clean out the record and try again
- (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
- (case *transport-type*
- ((http)(http-transport:close-connections)))
- (if *runremote*
- (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id)
- )
+ (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332
+ (http-transport:close-connections runremote)
(thread-sleep! 1)
- (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
+ (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))
)))
(begin ;; no server registered
;; (server:kind-run areapath)
(server:start-and-wait areapath)
(debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
(thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
- (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))
+ (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))))
(else
(debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))
+;;
+;; connect - stored in remote-condat
+;;
+;; (define (http-transport:client-connect iface port server-id runremote)
+(define (client:connect iface port server-id runremote-in)
+ (let* ((runremote (or runremote-in
+ (make-runremote))))
+ (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
+ (let* ((api-url (conc "http://" iface ":" port "/api"))
+ (api-uri (uri-reference (conc "http://" iface ":" port "/api")))
+ (api-req (make-request method: 'POST uri: api-uri)))
+ ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
+ (remote-iface-set! runremote iface)
+ (remote-port-set! runremote port)
+ (remote-server-id-set! runremote server-id)
+ (remote-connect-time-set! runremote (current-seconds))
+ (remote-last-access-set! runremote (current-seconds))
+ (remote-api-url-set! runremote api-url)
+ (remote-api-uri-set! runremote api-uri)
+ (remote-api-req-set! runremote api-req)
+ runremote)))
+
Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -24,10 +24,12 @@
matchable regex posix (srfi 18) extras ;; tcp
(prefix nanomsg nmsg:)
(prefix sqlite3 sqlite3:)
pkts (prefix dbi dbi:)
)
+(use posix-extras pathname-expand files)
+
(declare (unit common))
(declare (uses commonmod))
(import commonmod)
@@ -210,12 +212,10 @@
;; Miscellaneous
(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers
(define *numcpus-cache* (make-hash-table))
-(use posix-extras pathname-expand files)
-
;; this plugs a hole in posix-extras in recent chicken versions > 4.9)
(let-values (( (chicken-release-number chicken-major-version)
(apply values
(map string->number
(take
@@ -249,33 +249,10 @@
(define (common:get-sync-lock-filepath)
(let* ((tmp-area (common:get-db-tmp-area))
(lockfile (conc tmp-area "/megatest.db.sync-lock")))
lockfile))
-;;======================================================================
-;; when called from a wrapper I need sometimes to find the calling
-;; wrapper, this is for dashboard to find the correct megatest.
-;;
-(define (common:find-local-megatest #!optional (progname "megatest"))
- (let ((res (filter file-exists?
- (map (lambda (updir)
- (let* ((lm (car (argv)))
- (dir (pathname-directory lm))
- (exe (pathname-strip-directory lm)))
- (conc (if dir (conc dir "/") "")
- (case (string->symbol exe)
- ((dboard) (conc updir progname))
- ((mtest) (conc updir progname))
- ((dashboard) progname)
- (else exe)))))
- '("../../" "../")))))
- (if (null? res)
- (begin
- (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
- progname)
- (car res))))
-
(define *common:logpro-exit-code->status-sym-alist*
'( ( 0 . pass )
( 1 . fail )
( 2 . warn )
( 3 . check )
@@ -315,26 +292,39 @@
(else "FAIL")))
(define (common:logpro-exit-code->test-status exit-code)
(status-sym->string (common:logpro-exit-code->status-sym exit-code)))
+;;
(defstruct remote
+
+ ;; transport to be used
+ ;; http - use http-transport
+ ;; http-read-cached - use http-transport for writes but in-mem cached for reads
+ (rmode 'http)
(hh-dat (let ((res (or (server:choose-server *toppath* 'homehost)
(cons #f #f))))
(assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res))
res))
(server-url #f) ;; (server:check-if-running *toppath*) #f))
(server-id #f)
- (server-info (if *toppath* (server:check-if-running *toppath*) #f))
+ (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f))
(last-server-check 0) ;; last time we checked to see if the server was alive
- (connect-time (current-seconds))
- (conndat #f)
- (transport *transport-type*)
+ (connect-time (current-seconds)) ;; when we first connected
+ (last-access (current-seconds)) ;; last time we talked to server
+ ;; (conndat #f) ;; iface port api-uri api-url api-req seconds server-id
(server-timeout (server:expiration-timeout))
(force-server #f)
(ro-mode #f)
- (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode
+ (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode
+
+ ;; conndat stuff
+ (iface #f) ;; TODO: Consolidate this data with server-url and server-info above
+ (port #f)
+ (api-url #f)
+ (api-uri #f)
+ (api-req #f))
;; launching and hosts
(defstruct host
(reachable #f)
(last-update 0)
@@ -408,27 +398,48 @@
(define (common:version-changed?)
(not (equal? (common:get-last-run-version)
(common:version-signature))))
+
+;; From 1.70 to 1.80, db's are compatible.
+
(define (common:api-changed?)
- (not (equal? (substring (->string megatest-version) 0 4)
- (substring (conc (common:get-last-run-version)) 0 4))))
+ (let* (
+ (megatest-major-version (substring (->string megatest-version) 0 4))
+ (run-major-version (substring (conc (common:get-last-run-version)) 0 4))
+ )
+ (and (not (equal? megatest-major-version "1.80"))
+ (not (equal? megatest-major-version megatest-run-version)))
+ )
+)
;;======================================================================
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
- (apply db:multi-db-sync
- dbstruct
- 'schema
- 'killservers
- 'adj-target
- 'new2old
- '(dejunk)
- )
+ (case (rmt:transport-mode)
+ ((http)
+ (apply db:multi-db-sync
+ dbstruct
+ 'schema
+ 'killservers
+ 'adj-target
+ 'new2old
+ '(dejunk)
+ ))
+ ((tcp nfs)
+ (debug:print 0 *default-log-port* "WARNING: cleanup-db NOT implemented yet for tcp and nfs.")
+ #;(apply db:multi-db-sync
+ dbstruct
+ 'schema
+ 'killservers
+ 'adj-target
+ 'new2old
+ '(dejunk)
+ )))
(if (common:api-changed?)
(common:set-last-run-version)))
(define (common:snapshot-file filepath #!key (subdir ".") )
(if (file-exists? filepath)
@@ -520,11 +531,11 @@
(define (common:rotate-logs)
(let* ((all-files (make-hash-table))
(stats (make-hash-table))
(inc-stat (lambda (key)
(hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1))))
- (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
+ (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age
(if (not (directory-exists? "logs"))(create-directory "logs"))
(directory-fold
(lambda (file rem)
(handle-exceptions
exn
@@ -599,14 +610,15 @@
;;======================================================================
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
- (if (common:on-homehost?)
+ (if (and *toppath* ;; do nothing if *toppath* not yet provided
+ (common:on-homehost?))
(if (common:api-changed?)
(let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
- (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
+ (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".megatest/main.db"))
(read-only (not (file-write-access? dbfile)))
(dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t)))
(debug:print 0 *default-log-port*
"WARNING: Version mismatch!\n"
" expected: " (common:version-signature) "\n"
@@ -626,14 +638,14 @@
(common:cleanup-db dbstruct)))
((not (common:file-exists? mtconf))
(debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (common:file-exists? dbfile))
- (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.")
+ (debug:print 0 *default-log-port* " .megatest/main.db does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (eq? (current-user-id)(file-owner mtconf)))
- (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.")
+ (debug:print 0 *default-log-port* " You do not own .megatest/main.db in this area. Cannot proceed with megatest version migration.")
(exit 1))
(read-only
(debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.")
(exit 1))
(else
@@ -710,22 +722,21 @@
""))))
(define (common:alist-ref/default key alist default)
(or (alist-ref key alist) default))
-(define (common:low-noise-print waitval . keys)
- (let* ((key (string-intersperse (map conc keys) "-" ))
- (lasttime (hash-table-ref/default *common:denoise* key 0))
- (currtime (current-seconds)))
- (if (> (- currtime lasttime) waitval)
- (begin
- (hash-table-set! *common:denoise* key currtime)
- #t)
- #f)))
-
-(define (common:get-megatest-exe)
- (or (getenv "MT_MEGATEST") "megatest"))
+;; moved into commonmod
+;;
+;; (define (common:low-noise-print waitval . keys)
+;; (let* ((key (string-intersperse (map conc keys) "-" ))
+;; (lasttime (hash-table-ref/default *common:denoise* key 0))
+;; (currtime (current-seconds)))
+;; (if (> (- currtime lasttime) waitval)
+;; (begin
+;; (hash-table-set! *common:denoise* key currtime)
+;; #t)
+;; #f)))
(define (common:read-encoded-string instr)
(handle-exceptions
exn
(handle-exceptions
@@ -946,20 +957,21 @@
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn)
(exit 1))
- (let* ((tsname (common:get-testsuite-name))
+ (let* ((toppath (common:real-path *toppath*))
+ (tsname (common:get-testsuite-name))
(dbpath (common:get-create-writeable-dir
(list (conc "/tmp/" (current-user-name)
"/megatest_localdb/"
tsname "/"
- (string-translate *toppath* "/" "."))
+ (string-translate toppath "/" "."))
(conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
"/megatest_localdb/"
tsname
- (string-translate *toppath* "/" "."))
+ (string-translate toppath "/" "."))
))))
(set! *db-cache-path* dbpath)
;; ensure megatest area has .megatest
(let ((dbarea (conc *toppath* "/.megatest")))
(if (not (file-exists? dbarea))
@@ -972,23 +984,18 @@
#f)))
(define (common:get-area-path-signature)
(message-digest-string (md5-primitive) *toppath*))
-(define (common:get-signature str)
- (message-digest-string (md5-primitive) str))
-
;;======================================================================
;; E X I T H A N D L I N G
;;======================================================================
(define (common:run-sync?)
- (and (common:on-homehost?)
- (args:get-arg "-server")))
-
-(define (common:human-time)
- (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
+ (and *toppath* ;; gate if called before *toppath* is set
+ (common:on-homehost?)
+ (args:get-arg "-server")))
(define (std-signal-handler signum)
;; (signal-mask! signum)
(set! *time-to-exit* #t)
@@ -1048,26 +1055,10 @@
(define (common:get-disks #!key (configf #f))
(hash-table-ref/default
(or configf (read-config "megatest.config" #f #t))
"disks" '("none" "")))
-;;======================================================================
-;; return first command that exists, else #f
-;;
-(define (common:which cmds)
- (if (null? cmds)
- #f
- (let loop ((hed (car cmds))
- (tal (cdr cmds)))
- (let ((res (with-input-from-pipe (conc "which " hed) read-line)))
- (if (and (string? res)
- (common:file-exists? res))
- res
- (if (null? tal)
- #f
- (loop (car tal)(cdr tal))))))))
-
(define (common:get-install-area)
(let ((exe-path (car (argv))))
(if (common:file-exists? exe-path)
(handle-exceptions
exn
@@ -1345,11 +1336,11 @@
(else
(debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.")
#t)))) ;; default to requiring server
(if force-result
(begin
- (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".")
+ (debug:print-info 0 *default-log-port* "ATTENTION! Forcing use of server, force setting is \"" force-setting "\".")
#t)
#f)))
;;======================================================================
;; M I S C L I S T S
@@ -1598,10 +1589,30 @@
path) ;; just give up
(with-input-from-pipe
(conc "/bin/readlink -f " path)
(lambda ()
(read-line)))))
+
+;; for reasons I don't understand multiple calls to real-path in parallel threads
+;; must be protected by mutexes
+;;
+(define (common:real-path inpath)
+ ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params)
+ ;; (let-values
+ ;; (((inp oup pid) (process "readlink" (list "-f" inpath))))
+ ;; (with-input-from-port inp
+ ;; (let loop ((inl (read-line))
+ ;; (res #f))
+ ;; (print "inl=" inl)
+ ;; (if (eof-object? inl)
+ ;; (begin
+ ;; (close-input-port inp)
+ ;; (close-output-port oup)
+ ;; ;; (process-wait pid)
+ ;; res)
+ ;; (loop (read-line) inl))))))
+ (with-input-from-pipe (conc "readlink -f " inpath) read-line))
;;======================================================================
;; returns *effective load* (not normalized)
;;
(define (common:get-intercept onemin fivemin)
@@ -1989,16 +2000,27 @@
(host-last-used-set! rec curr-time)
new-best)
(if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))
(define (common:wait-for-homehost-load maxnormload msg)
- (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
- #f
- (server:choose-server *toppath* 'homehost)))
- (hh (if hh-dat (car hh-dat) #f)))
- (common:wait-for-normalized-load maxnormload msg hh)))
-
+ (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test...
+ (if (not *toppath*)
+ (begin
+ (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.")
+ (thread-sleep! 30)
+ (if (< (- (current-seconds) start-time) 300)
+ (loop start-time)))))
+ (case (rmt:transport-mode)
+ ((http)
+ (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+ #f
+ (server:choose-server *toppath* 'homehost)))
+ (hh (if hh-dat (car hh-dat) #f)))
+ (common:wait-for-normalized-load maxnormload msg hh)))
+ (else
+ (common:wait-for-normalized-load maxnormload msg (get-host-name)))))
+
(define (common:get-num-cpus remote-host)
(let* ((actual-host (or remote-host (get-host-name))))
;; hosts had better not be changing the number of cpus too often!
(or (hash-table-ref/default *numcpus-cache* actual-host #f)
(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
@@ -2218,30 +2240,10 @@
(uname #f))
(if (null? (car uname-res))
"unknown"
(caar uname-res))))
-;; for reasons I don't understand multiple calls to real-path in parallel threads
-;; must be protected by mutexes
-;;
-(define (common:real-path inpath)
- ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params)
- ;; (let-values
- ;; (((inp oup pid) (process "readlink" (list "-f" inpath))))
- ;; (with-input-from-port inp
- ;; (let loop ((inl (read-line))
- ;; (res #f))
- ;; (print "inl=" inl)
- ;; (if (eof-object? inl)
- ;; (begin
- ;; (close-input-port inp)
- ;; (close-output-port oup)
- ;; ;; (process-wait pid)
- ;; res)
- ;; (loop (read-line) inl))))))
- (with-input-from-pipe (conc "readlink -f " inpath) read-line))
-
;;======================================================================
;; D I S K S P A C E
;;======================================================================
(define (common:get-disk-space-used fpath)
@@ -2619,291 +2621,10 @@
(cond
(with-vars (common:without-vars fullcmd))
(with-orig-env (common:with-orig-env fullcmd))
(else (common:without-vars fullcmd "MT_.*")))))
-;;======================================================================
-;; T I M E A N D D A T E
-;;======================================================================
-
-;;======================================================================
-;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
-(define (common:hms-string->seconds tstr)
- (let ((parts (string-split-fields "\\w+" tstr))
- (time-secs 0)
- ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
- (trx (regexp "(\\d+)([smhdMyw])")))
- (for-each (lambda (part)
- (let ((match (string-match trx part)))
- (if match
- (let ((val (string->number (cadr match)))
- (unt (caddr match)))
- (if val
- (set! time-secs (+ time-secs (* val
- (case (string->symbol unt)
- ((s) 1)
- ((m) 60) ;; minutes
- ((h) 3600)
- ((d) 86400)
- ((w) 604800)
- ((M) 2628000) ;; aproximately one month
- ((y) 31536000)
- (else #f))))))))))
- parts)
- time-secs))
-
-(define (seconds->hr-min-sec secs)
- (let* ((hrs (quotient secs 3600))
- (min (quotient (- secs (* hrs 3600)) 60))
- (sec (- secs (* hrs 3600)(* min 60))))
- (conc (if (> hrs 0)(conc hrs "hr ") "")
- (if (> min 0)(conc min "m ") "")
- sec "s")))
-
-(define (seconds->time-string sec)
- (time->string
- (seconds->local-time sec) "%H:%M:%S"))
-
-(define (seconds->work-week/day-time sec)
- (time->string
- (seconds->local-time sec) "ww%V.%u %H:%M"))
-
-(define (seconds->work-week/day sec)
- (time->string
- (seconds->local-time sec) "ww%V.%u"))
-
-(define (seconds->year-work-week/day sec)
- (time->string
- (seconds->local-time sec) "%yww%V.%w"))
-
-(define (seconds->year-work-week/day-time sec)
- (time->string
- (seconds->local-time sec) "%Yww%V.%w %H:%M"))
-
-(define (seconds->year-week/day-time sec)
- (time->string
- (seconds->local-time sec) "%Yw%V.%w %H:%M"))
-
-(define (seconds->quarter sec)
- (case (string->number
- (time->string
- (seconds->local-time sec)
- "%m"))
- ((1 2 3) 1)
- ((4 5 6) 2)
- ((7 8 9) 3)
- ((10 11 12) 4)
- (else #f)))
-
-;;======================================================================
-;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
-;;
-(define (common:date-time->seconds datetime)
- (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))
-
-;;======================================================================
-;; given span of seconds tstart to tend
-;; find start time to mark and mark delta
-;;
-(define (common:find-start-mark-and-mark-delta tstart tend)
- (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
- (result #f)
- (min 60)
- (hr (* 60 60))
- (day (* 24 hr))
- (yr (* 365 day)) ;; year
- (mo (/ yr 12))
- (wk (* day 7)))
- (for-each
- (lambda (max-blks)
- (for-each
- (lambda (span) ;; 5 2 1
- (if (not result)
- (for-each
- (lambda (timeunit timesym) ;; year month day hr min sec
- (if (not result)
- (let* ((time-blk (* span timeunit))
- (num-blks (quotient deltat time-blk)))
- (if (and (> num-blks 4)(< num-blks max-blks))
- (let ((first (* (quotient tstart time-blk) time-blk)))
- (set! result (list span timeunit time-blk first timesym))
- )))))
- (list yr mo wk day hr min 1)
- '( y mo w d h m s))))
- (list 8 6 5 2 1)))
- '(5 10 15 20 30 40 50 500))
- (if values
- (apply values result)
- (values 0 day 1 0 'd))))
-
-;;======================================================================
-;; given x y lim return the cron expansion
-;;
-(define (common:expand-cron-slash x y lim)
- (let loop ((curr x)
- (res `()))
- (if (< curr lim)
- (loop (+ curr y) (cons curr res))
- (reverse res))))
-
-;;======================================================================
-;; expand a complex cron string to a list of cron strings
-;;
-;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c
-;;
-;; NOTE: with flatten a lot of the crud below can be factored down.
-;;
-(define (common:cron-expand cron-str)
- (if (list? cron-str)
- (flatten
- (fold (lambda (x res)
- (if (list? x)
- (let ((newres (map common:cron-expand x)))
- (append x newres))
- (cons x res)))
- '()
- cron-str)) ;; (map common:cron-expand cron-str))
- (let ((cron-items (string-split cron-str))
- (slash-rx (regexp "(\\d+)/(\\d+)"))
- (comma-rx (regexp ".*,.*"))
- (max-vals '((min . 60)
- (hour . 24)
- (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
- (month . 12)
- (dayofweek . 7))))
- (if (< (length cron-items) 5) ;; bad spec
- cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it
- (let loop ((hed (car cron-items))
- (tal (cdr cron-items))
- (type 'min)
- (type-tal '(hour dayofmonth month dayofweek))
- (res '()))
- (regex-case
- hed
- (slash-rx ( _ base incr ) (let* ((basen (string->number base))
- (incrn (string->number incr))
- (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
- (new-list-crons (fold (lambda (x myres)
- (cons (conc (if (null? res)
- ""
- (conc (string-intersperse res " ") " "))
- x " " (string-intersperse tal " "))
- myres))
- '() expanded-vals)))
- ;; (print "new-list-crons: " new-list-crons)
- ;; (fold (lambda (x res)
- ;; (if (list? x)
- ;; (let ((newres (map common:cron-expand x)))
- ;; (append x newres))
- ;; (cons x res)))
- ;; '()
- (flatten (map common:cron-expand new-list-crons))))
- ;; (map common:cron-expand (map common:cron-expand new-list-crons))))
- (else (if (null? tal)
- cron-str
- (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))
-
-;;======================================================================
-;; given a cron string and the last time event was processed return #t to run or #f to not run
-;;
-;; min hour dayofmonth month dayofweek
-;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7
-;;
-;; #t => yes, run the job
-;; #f => no, do not run the job
-;;
-(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
- (let* ((cron-items (map string->number (string-split cron-str)))
- (now-seconds (or now-seconds-in (current-seconds)))
- (now-time (seconds->local-time now-seconds))
- (last-done-time (seconds->local-time last-done))
- (all-times (make-hash-table)))
- ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
- (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
- #f
- (match-let ((( cmin chour cdayofmonth cmonth cdayofweek)
- cron-items)
- ;; 0 1 2 3 4 5 6
- ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
- (vector->list now-time))
- ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
- (vector->list last-done-time)))
- ;; create all possible time slots
- ;; remove invalid slots due to (for example) day of week
- ;; get the start and end entries for the ref-seconds (current) time
- ;; if last-done > ref-seconds => this is an ERROR!
- ;; does the last-done time fall in the legit region?
- ;; yes => #f do not run again this command
- ;; no => #t ok to run the command
- (for-each ;; month
- (lambda (month)
- (for-each ;; dayofmonth
- (lambda (dom)
- (for-each
- (lambda (hr) ;; hour
- (for-each
- (lambda (minute) ;; minute
- (let ((copy-now (apply vector (vector->list now-time))))
- (vector-set! copy-now 0 0) ;; force seconds to zero
- (vector-set! copy-now 1 minute)
- (vector-set! copy-now 2 hr)
- (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced
- (vector-set! copy-now 4 month)
- (let* ((copy-now-secs (local-time->seconds copy-now))
- (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector
- (if (or (not cdayofweek)
- (equal? (vector-ref new-copy 6)
- cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
- (if (or (not cdayofmonth)
- (equal? (vector-ref new-copy 3)
- (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
- (hash-table-set! all-times copy-now-secs new-copy))))))
- (if cmin
- `(,cmin) ;; if given cmin, have to use it
- (list (- nmin 1) nmin (+ nmin 1))))) ;; minute
- (if chour
- `(,chour)
- (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
- (if cdayofmonth
- `(,cdayofmonth)
- (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
- (if cmonth
- `(,cmonth)
- (list (- nmonth 1) nmonth (+ nmonth 1))))
- (let ((before #f)
- (is-in #f))
- (for-each
- (lambda (moment)
- (if (and before
- (<= before now-seconds)
- (>= moment now-seconds))
- (begin
- ;; (print)
- ;; (print "Before: " (time->string (seconds->local-time before)))
- ;; (print "Now: " (time->string (seconds->local-time now-seconds)))
- ;; (print "After: " (time->string (seconds->local-time moment)))
- ;; (print "Last: " (time->string (seconds->local-time last-done)))
- (if (< last-done before)
- (set! is-in before))
- ))
- (set! before moment))
- (sort (hash-table-keys all-times) <))
- is-in)))))
-
-(define (common:extended-cron cron-str now-seconds-in last-done)
- (let ((expanded-cron (common:cron-expand cron-str)))
- (if (string? expanded-cron)
- (common:cron-event expanded-cron now-seconds-in last-done)
- (let loop ((hed (car expanded-cron))
- (tal (cdr expanded-cron)))
- (if (common:cron-event hed now-seconds-in last-done)
- #t
- (if (null? tal)
- #f
- (loop (car tal)(cdr tal))))))))
-
;;======================================================================
;; C O L O R S
;;======================================================================
(define (common:name->iup-color name)
Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -17,21 +17,39 @@
;; along with Megatest. If not, see .
;;======================================================================
(declare (unit commonmod))
+;; (declare (uses debugprint))
(use srfi-69)
(module commonmod
*
-(import scheme chicken data-structures extras files)
-(import (prefix sqlite3 sqlite3:)
- posix typed-records srfi-18 srfi-69
- md5 message-digest
- regex srfi-1)
+(import scheme
+ chicken
+
+ (prefix sqlite3 sqlite3:)
+ data-structures
+ extras
+ files
+ matchable
+ md5
+ message-digest
+ pathname-expand
+ posix
+ posix-extras
+ regex
+ regex-case
+ srfi-1
+ srfi-18
+ srfi-69
+ typed-records
+
+ ;; debugprint
+ )
;;======================================================================
;; CONTENTS
;;
;; config file utils
@@ -136,10 +154,54 @@
;;======================================================================
;; misc conversion, data manipulation functions
;;======================================================================
+;;======================================================================
+;; return first command that exists, else #f
+;;
+(define (common:which cmds)
+ (if (null? cmds)
+ #f
+ (let loop ((hed (car cmds))
+ (tal (cdr cmds)))
+ (let ((res (with-input-from-pipe (conc "which " hed) read-line)))
+ (if (and (string? res)
+ (file-exists? res))
+ res
+ (if (null? tal)
+ #f
+ (loop (car tal)(cdr tal))))))))
+
+(define (common:get-megatest-exe)
+ (let* ((mtexe (or (get-environment-variable "MT_MEGATEST")
+ (common:which '("megatest"))
+ "megatest")))
+ (if (file-exists? mtexe)
+ (realpath mtexe)
+ mtexe)))
+
+(define (common:get-megatest-exe-dir)
+ (let* ((mtexe (common:get-megatest-exe)))
+ (pathname-directory mtexe)))
+
+;; more generic and comprehensive version of get-megatest-exe
+;;
+(define (common:get-mtexe)
+ (let* ((mtpathdir (common:get-megatest-exe-dir)))
+ (or (common:get-megatest-exe)
+ (if mtpathdir
+ (conc mtpathdir"/megatest")
+ #f)
+ "megatest")))
+
+(define (common:get-megatest-exe-path)
+ (let* ((mtpathdir (common:get-megatest-exe-dir)))
+ (conc mtpathdir":"(get-environment-variable "PATH") ":.")))
+
+(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) ))
+
;; if it looks like a number -> convert it to a number, else return it
;;
(define (lazy-convert inval)
(let* ((as-num (if (string? inval)(string->number inval) #f)))
(or as-num inval)))
@@ -161,10 +223,40 @@
(filter (lambda (x)
(not (string-match "^\\s*" x)))
val-list))
'())))
+(define (get-cpu-load)
+ (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines)))
+ (map string->number (string-split load-info))))
+
+(define *current-host-cores* #f)
+
+(define (get-current-host-cores)
+ (or *current-host-cores*
+ (let ((cpu-info (with-input-from-file "/proc/cpuinfo" read-lines)))
+ (let loop ((lines cpu-info))
+ (if (null? lines)
+ 1 ;; gotta be at least one!
+ (let* ((inl (car lines))
+ (tail (cdr lines))
+ (parts (string-split inl)))
+ (match parts
+ (("cpu" "cores" ":" num) (string->number num))
+ (else (loop tail)))))))))
+
+(define (number-of-processes-running processname)
+ (with-input-from-pipe
+ (conc "ps -def | egrep \""processname"\" |wc -l")
+ (lambda ()
+ (string->number (read-line)))))
+
+;; get the normalized (i.e. load / numcpus) for *this* host
+;;
+(define (get-normalized-cpu-load)
+ (/ (get-cpu-load)(get-current-host-cores)))
+
;;======================================================================
;; testsuite and area utilites
;;======================================================================
(define (get-testsuite-name toppath configdat)
@@ -208,18 +300,344 @@
(let ((adat (get-section cfgdat "areas")))
(map (lambda (entry)
`(,(car entry) .
,(val->alist (cadr entry))))
adat)))
+
+;;======================================================================
+;; time utils
+;;======================================================================
+
+(define (common:human-time)
+ (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
+
+;;======================================================================
+;; T I M E A N D D A T E
+;;======================================================================
+
+;;======================================================================
+;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
+(define (common:hms-string->seconds tstr)
+ (let ((parts (string-split-fields "\\w+" tstr))
+ (time-secs 0)
+ ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
+ (trx (regexp "(\\d+)([smhdMyw])")))
+ (for-each (lambda (part)
+ (let ((match (string-match trx part)))
+ (if match
+ (let ((val (string->number (cadr match)))
+ (unt (caddr match)))
+ (if val
+ (set! time-secs (+ time-secs (* val
+ (case (string->symbol unt)
+ ((s) 1)
+ ((m) 60) ;; minutes
+ ((h) 3600)
+ ((d) 86400)
+ ((w) 604800)
+ ((M) 2628000) ;; aproximately one month
+ ((y) 31536000)
+ (else #f))))))))))
+ parts)
+ time-secs))
+
+(define (seconds->hr-min-sec secs)
+ (let* ((hrs (quotient secs 3600))
+ (min (quotient (- secs (* hrs 3600)) 60))
+ (sec (- secs (* hrs 3600)(* min 60))))
+ (conc (if (> hrs 0)(conc hrs "hr ") "")
+ (if (> min 0)(conc min "m ") "")
+ sec "s")))
+
+(define (seconds->time-string sec)
+ (time->string
+ (seconds->local-time sec) "%H:%M:%S"))
+
+(define (seconds->work-week/day-time sec)
+ (time->string
+ (seconds->local-time sec) "ww%V.%u %H:%M"))
+
+(define (seconds->work-week/day sec)
+ (time->string
+ (seconds->local-time sec) "ww%V.%u"))
+
+(define (seconds->year-work-week/day sec)
+ (time->string
+ (seconds->local-time sec) "%yww%V.%w"))
+
+(define (seconds->year-work-week/day-time sec)
+ (time->string
+ (seconds->local-time sec) "%Yww%V.%w %H:%M"))
+
+(define (seconds->year-week/day-time sec)
+ (time->string
+ (seconds->local-time sec) "%Yw%V.%w %H:%M"))
+
+(define (seconds->quarter sec)
+ (case (string->number
+ (time->string
+ (seconds->local-time sec)
+ "%m"))
+ ((1 2 3) 1)
+ ((4 5 6) 2)
+ ((7 8 9) 3)
+ ((10 11 12) 4)
+ (else #f)))
+
+;;======================================================================
+;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
+;;
+(define (common:date-time->seconds datetime)
+ (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))
+
+;;======================================================================
+;; given span of seconds tstart to tend
+;; find start time to mark and mark delta
+;;
+(define (common:find-start-mark-and-mark-delta tstart tend)
+ (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
+ (result #f)
+ (min 60)
+ (hr (* 60 60))
+ (day (* 24 hr))
+ (yr (* 365 day)) ;; year
+ (mo (/ yr 12))
+ (wk (* day 7)))
+ (for-each
+ (lambda (max-blks)
+ (for-each
+ (lambda (span) ;; 5 2 1
+ (if (not result)
+ (for-each
+ (lambda (timeunit timesym) ;; year month day hr min sec
+ (if (not result)
+ (let* ((time-blk (* span timeunit))
+ (num-blks (quotient deltat time-blk)))
+ (if (and (> num-blks 4)(< num-blks max-blks))
+ (let ((first (* (quotient tstart time-blk) time-blk)))
+ (set! result (list span timeunit time-blk first timesym))
+ )))))
+ (list yr mo wk day hr min 1)
+ '( y mo w d h m s))))
+ (list 8 6 5 2 1)))
+ '(5 10 15 20 30 40 50 500))
+ (if values
+ (apply values result)
+ (values 0 day 1 0 'd))))
+
+;;======================================================================
+;; given x y lim return the cron expansion
+;;
+(define (common:expand-cron-slash x y lim)
+ (let loop ((curr x)
+ (res `()))
+ (if (< curr lim)
+ (loop (+ curr y) (cons curr res))
+ (reverse res))))
+
+;;======================================================================
+;; expand a complex cron string to a list of cron strings
+;;
+;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c
+;;
+;; NOTE: with flatten a lot of the crud below can be factored down.
+;;
+(define (common:cron-expand cron-str)
+ (if (list? cron-str)
+ (flatten
+ (fold (lambda (x res)
+ (if (list? x)
+ (let ((newres (map common:cron-expand x)))
+ (append x newres))
+ (cons x res)))
+ '()
+ cron-str)) ;; (map common:cron-expand cron-str))
+ (let ((cron-items (string-split cron-str))
+ (slash-rx (regexp "(\\d+)/(\\d+)"))
+ (comma-rx (regexp ".*,.*"))
+ (max-vals '((min . 60)
+ (hour . 24)
+ (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
+ (month . 12)
+ (dayofweek . 7))))
+ (if (< (length cron-items) 5) ;; bad spec
+ cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it
+ (let loop ((hed (car cron-items))
+ (tal (cdr cron-items))
+ (type 'min)
+ (type-tal '(hour dayofmonth month dayofweek))
+ (res '()))
+ (regex-case
+ hed
+ (slash-rx ( _ base incr ) (let* ((basen (string->number base))
+ (incrn (string->number incr))
+ (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
+ (new-list-crons (fold (lambda (x myres)
+ (cons (conc (if (null? res)
+ ""
+ (conc (string-intersperse res " ") " "))
+ x " " (string-intersperse tal " "))
+ myres))
+ '() expanded-vals)))
+ ;; (print "new-list-crons: " new-list-crons)
+ ;; (fold (lambda (x res)
+ ;; (if (list? x)
+ ;; (let ((newres (map common:cron-expand x)))
+ ;; (append x newres))
+ ;; (cons x res)))
+ ;; '()
+ (flatten (map common:cron-expand new-list-crons))))
+ ;; (map common:cron-expand (map common:cron-expand new-list-crons))))
+ (else (if (null? tal)
+ cron-str
+ (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))
+
+;;======================================================================
+;; given a cron string and the last time event was processed return #t to run or #f to not run
+;;
+;; min hour dayofmonth month dayofweek
+;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7
+;;
+;; #t => yes, run the job
+;; #f => no, do not run the job
+;;
+(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
+ (let* ((cron-items (map string->number (string-split cron-str)))
+ (now-seconds (or now-seconds-in (current-seconds)))
+ (now-time (seconds->local-time now-seconds))
+ (last-done-time (seconds->local-time last-done))
+ (all-times (make-hash-table)))
+ ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
+ (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
+ #f
+ (match-let ((( cmin chour cdayofmonth cmonth cdayofweek)
+ cron-items)
+ ;; 0 1 2 3 4 5 6
+ ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
+ (vector->list now-time))
+ ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
+ (vector->list last-done-time)))
+ ;; create all possible time slots
+ ;; remove invalid slots due to (for example) day of week
+ ;; get the start and end entries for the ref-seconds (current) time
+ ;; if last-done > ref-seconds => this is an ERROR!
+ ;; does the last-done time fall in the legit region?
+ ;; yes => #f do not run again this command
+ ;; no => #t ok to run the command
+ (for-each ;; month
+ (lambda (month)
+ (for-each ;; dayofmonth
+ (lambda (dom)
+ (for-each
+ (lambda (hr) ;; hour
+ (for-each
+ (lambda (minute) ;; minute
+ (let ((copy-now (apply vector (vector->list now-time))))
+ (vector-set! copy-now 0 0) ;; force seconds to zero
+ (vector-set! copy-now 1 minute)
+ (vector-set! copy-now 2 hr)
+ (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced
+ (vector-set! copy-now 4 month)
+ (let* ((copy-now-secs (local-time->seconds copy-now))
+ (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector
+ (if (or (not cdayofweek)
+ (equal? (vector-ref new-copy 6)
+ cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
+ (if (or (not cdayofmonth)
+ (equal? (vector-ref new-copy 3)
+ (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
+ (hash-table-set! all-times copy-now-secs new-copy))))))
+ (if cmin
+ `(,cmin) ;; if given cmin, have to use it
+ (list (- nmin 1) nmin (+ nmin 1))))) ;; minute
+ (if chour
+ `(,chour)
+ (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
+ (if cdayofmonth
+ `(,cdayofmonth)
+ (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
+ (if cmonth
+ `(,cmonth)
+ (list (- nmonth 1) nmonth (+ nmonth 1))))
+ (let ((before #f)
+ (is-in #f))
+ (for-each
+ (lambda (moment)
+ (if (and before
+ (<= before now-seconds)
+ (>= moment now-seconds))
+ (begin
+ ;; (print)
+ ;; (print "Before: " (time->string (seconds->local-time before)))
+ ;; (print "Now: " (time->string (seconds->local-time now-seconds)))
+ ;; (print "After: " (time->string (seconds->local-time moment)))
+ ;; (print "Last: " (time->string (seconds->local-time last-done)))
+ (if (< last-done before)
+ (set! is-in before))
+ ))
+ (set! before moment))
+ (sort (hash-table-keys all-times) <))
+ is-in)))))
+
+(define (common:extended-cron cron-str now-seconds-in last-done)
+ (let ((expanded-cron (common:cron-expand cron-str)))
+ (if (string? expanded-cron)
+ (common:cron-event expanded-cron now-seconds-in last-done)
+ (let loop ((hed (car expanded-cron))
+ (tal (cdr expanded-cron)))
+ (if (common:cron-event hed now-seconds-in last-done)
+ #t
+ (if (null? tal)
+ #f
+ (loop (car tal)(cdr tal))))))))
+
+
;;======================================================================
;; misc stuff
;;======================================================================
-;; (define (debug:print . params) #f)
-;; (define (debug:print-info . params) #f)
-;;
-;; (define (set-functions dbgp dbgpinfo)
-;; (set! debug:print dbgp)
-;; (set! debug:print-info dbgpinfo))
+(define (common:get-signature str)
+ (message-digest-string (md5-primitive) str))
+
+;;======================================================================
+;; hash of hashs
+;;======================================================================
+
+(define (db:hoh-set! dat key1 key2 val)
+ (let* ((subhash (hash-table-ref/default dat key1 #f)))
+ (if subhash
+ (hash-table-set! subhash key2 val)
+ (begin
+ (hash-table-set! dat key1 (make-hash-table))
+ (db:hoh-set! dat key1 key2 val)))))
+
+(define (db:hoh-get dat key1 key2)
+ (let* ((subhash (hash-table-ref/default dat key1 #f)))
+ (and subhash
+ (hash-table-ref/default subhash key2 #f))))
+
+;;======================================================================
+;; when called from a wrapper I need sometimes to find the calling
+;; wrapper, this is for dashboard to find the correct megatest.
+;;
+(define (common:find-local-megatest #!optional (progname "megatest"))
+ (let ((res (filter file-exists?
+ (map (lambda (updir)
+ (let* ((lm (car (argv)))
+ (dir (pathname-directory lm))
+ (exe (pathname-strip-directory lm)))
+ (conc (if dir (conc dir "/") "")
+ (case (string->symbol exe)
+ ((dboard) (conc updir progname))
+ ((mtest) (conc updir progname))
+ ((dashboard) progname)
+ (else exe)))))
+ '("../../" "../")))))
+ (if (null? res)
+ (begin
+ ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
+ progname)
+ (car res))))
)
Index: configf.scm
==================================================================
--- configf.scm
+++ configf.scm
@@ -25,10 +25,13 @@
(use regex regex-case matchable) ;; directory-utils)
(declare (unit configf))
(declare (uses process))
(declare (uses env))
(declare (uses keys))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
+(import commonmod)
(include "common_records.scm")
;; return list (path fullpath configname)
(define (find-config configname #!key (toppath #f))
@@ -97,10 +100,12 @@
(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)"))
(define (configf:system ht cmd)
(system cmd)
)
+
+(define configf:imports "(import commonmod)")
(define (configf:process-line l ht allow-system #!key (linenum #f))
(let loop ((res l))
(if (string? res)
(let ((matchdat (string-search configf:var-expand-regex res)))
@@ -111,11 +116,11 @@
(poststr (list-ref matchdat 4))
(result #f)
(start-time (current-seconds))
(cmdsym (string->symbol cmdtype))
(fullcmd (case cmdsym
- ((scheme scm) (conc "(lambda (ht)" cmd ")"))
+ ((scheme scm) (conc "(lambda (ht)(begin " configf:imports cmd "))"))
((system) (conc "(lambda (ht)(configf:system ht \"" cmd "\"))"))
((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))"))
((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))"))
((getenv gv) (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))"))
((mtrah) (conc "(lambda (ht)"
Index: dashboard-context-menu.scm
==================================================================
--- dashboard-context-menu.scm
+++ dashboard-context-menu.scm
@@ -32,10 +32,11 @@
(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))
(declare (unit dashboard-context-menu))
(declare (uses common))
+(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
@@ -43,10 +44,12 @@
(declare (uses subrun))
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
+
+(import commonmod)
(define (dboard:launch-testpanel run-id test-id)
(let* ((dboardexe (common:find-local-megatest "dashboard"))
(cmd (conc dboardexe
" -test " run-id "," test-id
Index: dashboard-guimonitor.scm
==================================================================
--- dashboard-guimonitor.scm
+++ dashboard-guimonitor.scm
@@ -34,10 +34,12 @@
(declare (unit dashboard-guimonitor))
(declare (uses common))
(declare (uses keys))
(declare (uses db))
(declare (uses tasks))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")
Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -31,17 +31,20 @@
(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))
(declare (unit dashboard-tests))
(declare (uses common))
+(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))
+
+(import commonmod)
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
@@ -459,12 +462,11 @@
;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
(let* ((db-path (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
- (dbstruct #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path: (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree")
- ;; local: #t))
+ (dbstruct #f) ;; NOT USED
(testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
(db-mod-time 0) ;; (file-modification-time db-path))
(last-update 0) ;; (current-seconds))
(request-update #t))
(if (not testdat)
ADDED dashboard-transport-mode.scm.template
Index: dashboard-transport-mode.scm.template
==================================================================
--- /dev/null
+++ dashboard-transport-mode.scm.template
@@ -0,0 +1,3 @@
+;; 'http or 'tcp
+(rmt:transport-mode 'nfs)
+;; (rmt:transport-mode 'http)
Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -44,20 +44,31 @@
(declare (uses dcommon))
(declare (uses dashboard-context-menu))
(declare (uses vg))
(declare (uses subrun))
(declare (uses mt))
-(declare (uses dbfile))
+(declare (uses dbmod))
+;; (declare (uses dbmemmod))
+(declare (uses dbfile))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
+(import commonmod)
+
+(import dbmod dbfile)
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "vg_records.scm")
+;; set some parameters here - these need to be put in something that can be loaded from other
+;; executables such as dashboard and mtutil
+;;
+(include "dashboard-transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)
(define help (conc
"Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version
" license GPL, Copyright (C) Matt Welland 2012-2017
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -22,10 +22,22 @@
;; Database access
;;======================================================================
;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc
+(declare (unit db))
+(declare (uses common))
+(declare (uses debugprint))
+(declare (uses dbmod))
+(declare (uses dbfile))
+(declare (uses keys))
+(declare (uses ods))
+(declare (uses client))
+(declare (uses mt))
+(declare (uses commonmod))
+(import commonmod)
+
(use (srfi 18)
extras
tcp
stack
(prefix sqlite3 sqlite3:)
@@ -44,28 +56,19 @@
z3
typed-records
matchable
files)
-(declare (unit db))
-(declare (uses common))
-(declare (uses dbmod))
-;; (declare (uses debugprint))
-(declare (uses dbfile))
-(declare (uses keys))
-(declare (uses ods))
-(declare (uses client))
-(declare (uses mt))
-
(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")
(define *number-of-writes* 0)
(define *number-non-write-queries* 0)
+(import debugprint)
(import dbmod)
(import dbfile)
;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
@@ -74,10 +77,17 @@
(state #f)
(status #f)
(count 0))
+(define (db:with-db dbstruct run-id r/w proc . params)
+ (case (rmt:transport-mode)
+ ((http)(dbfile:with-db dbstruct run-id r/w proc params))
+ ((tcp) (dbmod:with-db dbstruct run-id r/w proc params))
+ ((nfs) (dbmod:with-db dbstruct run-id r/w proc params))
+ (else (assert #f "FATAL: db:with-db called with non-existant transport mode"))))
+
;;======================================================================
;; hash of hashs
;;======================================================================
@@ -92,19 +102,10 @@
(define (db:hoh-get dat key1 key2)
(let* ((subhash (hash-table-ref/default dat key1 #f)))
(and subhash
(hash-table-ref/default subhash key2 #f))))
-(define (db:get-cache-stmth dbdat run-id db stmt)
- (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id))
- (stmt-cache (dbr:dbdat-stmt-cache dbdat))
- (stmth (db:hoh-get stmt-cache db stmt)))
- (or stmth
- (let* ((newstmth (sqlite3:prepare db stmt)))
- (db:hoh-set! stmt-cache db stmt newstmth)
- newstmth))))
-
;;======================================================================
;; SQLITE3 HELPERS
;;======================================================================
(define (db:general-sqlite-error-dump exn stmt . params)
@@ -133,10 +134,83 @@
(assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
(let* ((tmpdir (common:get-db-tmp-area)))
(if (not *dbstruct-dbs*)
(dbfile:setup do-sync *toppath* tmpdir)
*dbstruct-dbs*)))
+
+;; moved from dbfile
+;;
+;; ADD run-id SUPPORT
+;;
+(define (db:create-all-triggers dbstruct)
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (db:create-triggers db))))
+
+(define (db:create-triggers db)
+ (for-each (lambda (key)
+ (sqlite3:execute db (cadr key)))
+ db:trigger-list))
+
+(define (db:drop-all-triggers dbstruct)
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (db:drop-triggers db))))
+
+(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
+ (let* ((incompleted '())
+ (oldlaunched '())
+ (toplevels '())
+ ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
+ (deadtime (or ovr-deadtime 72000))) ;; twenty hours
+ (db:with-db
+ dbstruct run-id #f
+ (lambda (dbdat db)
+
+ ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
+ ;;
+ ;; HOWEVER: this code in run:test seems to work fine
+ ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
+ ;; (db:test-get-run_duration testdat)))
+ ;; 600)
+ ;; (db:delay-if-busy dbdat)
+ (sqlite3:for-each-row
+ (lambda (test-id run-dir uname testname item-path)
+ (if (and (equal? uname "n/a")
+ (equal? item-path "")) ;; this is a toplevel test
+ ;; what to do with toplevel? call rollup?
+ (begin
+ (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
+ ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
+ (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
+ (db:get-cache-stmth dbdat db
+ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
+ run-id deadtime)
+
+ ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
+ ;;
+ ;; (db:delay-if-busy dbdat)
+ (sqlite3:for-each-row
+ (lambda (test-id run-dir uname testname item-path)
+ (if (and (equal? uname "n/a")
+ (equal? item-path "")) ;; this is a toplevel test
+ ;; what to do with toplevel? call rollup?
+ (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+ (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
+ (db:get-cache-stmth dbdat db
+ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
+ run-id)
+
+ ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
+ (if (and (null? incompleted)
+ (null? oldlaunched)
+ (null? toplevels))
+ #f
+ #t)))))
+
;; looks up subdb and returns it, if not found then set up
;; and then return it.
;;
#;(define (db:get-db dbstruct run-id)
@@ -359,13 +433,15 @@
(define (db:cache-for-read-only source target #!key (use-last-update #f))
(if (and (hash-table-ref/default *global-db-store* target #f)
(>= (file-modification-time target)(file-modification-time source)))
(hash-table-ref *global-db-store* target)
(let* ((toppath (launch:setup))
- (targ-db-last-mod (if (common:file-exists? target)
- (file-modification-time target)
- 0))
+ (targ-db-last-mod (db:get-sqlite3-mod-time target))
+;; (if (common:file-exists? target)
+;; BUG: This needs to include wal mode stuff .shm etc.
+;; (file-modification-time target)
+;; 0))
(cache-db (or (hash-table-ref/default *global-db-store* target #f)
(db:open-megatest-db path: target)))
(source-db (db:open-megatest-db path: source))
(curr-time (current-seconds))
(res '())
@@ -405,35 +481,54 @@
;; use-last-update: #t)))
;; (thread-start! th1)
;; (apply proc cache-db params)
;; ))))
-
-
-
+(define (db:get-sqlite3-mod-time fname)
+ (let* ((wal-file (conc fname "-wal"))
+ (shm-file (conc fname "-shm"))
+ (get-mtime (lambda (f)
+ (if (and (file-exists? f)
+ (file-read-access? f))
+ (file-modification-time f)
+ 0))))
+ (max (get-mtime fname)
+ (get-mtime wal-file)
+ (get-mtime shm-file))))
+
(define (db:all-db-sync dbstruct)
(let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
(data-synced 0) ;; count of changed records
(tmp-area (common:get-db-tmp-area))
(dbfiles (glob (conc tmp-area"/.megatest/*.db")))
(sync-durations (make-hash-table))
(no-sync-db (db:open-no-sync-db)))
(for-each
- (lambda (file)
+ (lambda (file) ;; tmp db file
(debug:print-info 3 *default-log-port* "file: " file)
- (let* ((fname (conc (pathname-file file) ".db"))
- (fulln (conc *toppath*"/.megatest/"fname))
- (time1 (if (file-exists? file)
- (file-modification-time file)
- (begin
- (debug:print-info 2 *default-log-port* "Sync - I do not see file "file)
- 1)))
- (time2 (if (file-exists? fulln)
- (file-modification-time fulln)
- (begin
- (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln)
- 0)))
+ (let* ((fname (conc (pathname-file file) ".db")) ;; fname is tmp db file
+ (wal-file (conc fname "-wal"))
+ (shm-file (conc fname "-shm"))
+ (fulln (conc *toppath*"/.megatest/"fname)) ;; fulln is nfs db name
+ (wal-time (if (file-exists? wal-file)
+ (file-modification-time wal-file)
+ 0))
+ (shm-time (if (file-exists? shm-file)
+ (file-modification-time shm-file)
+ 0))
+ (time1 (db:get-sqlite3-mod-time file))
+;; (if (file-exists? file) ;; time1 is the max itime of the tmp db, -wal and -shm files.
+;; (max (file-modification-time file) wal-time shm-time)
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "file)
+;; 1)))
+ (time2 (db:get-sqlite3-mod-time fulln))
+;; (if (file-exists? fulln) ;; time2 is nfs file time
+;; (file-modification-time fulln)
+;; (begin
+;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln)
+;; 0)))
(changed (> (- time1 time2) (+ (random 5) 1))) ;; it has been at some few seconds since last synced
(changed10 (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd
(jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy?
(do-cp (cond
((not (file-exists? fulln)) ;; shouldn't happen, but this might recover
@@ -456,13 +551,30 @@
(hash-table-set! sync-durations (conc fname".db")
(- (current-milliseconds) start-time)))
(debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date")
)))
dbfiles)
- (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)))
+ ;; WHY does the dbdat need to be added back?
+ (if dbdat (dbfile:add-dbdat dbstruct #f dbdat))
+ )
#t)
+(define (db:kill-servers)
+ (let* ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath*))
+ (for-each
+ (lambda (server)
+ (handle-exceptions
+ exn
+ (begin
+ (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn)
+ #f)
+ (match-let (((mod-time host port start-time server-id pid) server))
+ (if (and host pid)
+ (tasks:kill-server host pid)))))
+ servers)
+ (delete-file* (common:get-sync-lock-filepath))))
+
;; options:
;;
;; 'killservers - kills all servers
;; 'dejunk - removes junk records
;; 'adj-testids - move test-ids into correct ranges
@@ -473,117 +585,97 @@
;; run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
(let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc))
(data-synced 0) ;; count of changed records
- (tmp-area (common:get-db-tmp-area))
- (old2new (member 'old2new options))
- (dejunk (member 'dejunk options))
- (killservers (member 'killservers options))
- (servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
- (src-area (if old2new *toppath* tmp-area))
- (dest-area (if old2new tmp-area *toppath*))
- (dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
- (keys (db:get-keys dbstruct))
- (sync-durations (make-hash-table)))
-
-
- (if killservers
- (begin
- (for-each
- (lambda (server)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn)
- #f)
- (match-let (((mod-time host port start-time server-id pid) server))
- (if (and host pid)
- (tasks:kill-server host pid)))))
- servers)
- (delete-file* (common:get-sync-lock-filepath))
- )
- )
- (for-each
- (lambda (srcfile)
- (debug:print-info 3 *default-log-port* "file: " srcfile)
- (let* ((fname (conc (pathname-file srcfile) ".db"))
- (basename (pathname-file srcfile))
- (run-id (if (string= basename "main") #f (string->number basename)))
- (destfile (conc dest-area "/.megatest/" fname))
- (dest-directory (conc dest-area "/.megatest/"))
- (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile))
- (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk))
- (time1 (file-modification-time srcfile))
- (time2 (if (file-exists? destfile)
- (begin
- (debug:print-info 2 *default-log-port* "destfile " destfile " exists")
- (file-modification-time destfile)
- )
- (begin
- (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile)
- 0)))
- (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds
-
- (do-cp (cond
- ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover
- (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile)
- (system (conc "/bin/mkdir -p " dest-directory))
- (system (conc "/bin/cp " srcfile " " destfile))
- #t)
- (changed ;; (and changed
- ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed.
- #t)
- ((and changed *time-to-exit*) ;; last sync
- #t)
- (else
- #f))))
- (if (or dejunk do-cp)
- (let* (
- (start-time (current-milliseconds))
-
- (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc)))
- (mtdb (dbr:subdb-mtdbdat subdb))
- (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))
-
- )
- (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds")
-
- (if old2new
- (begin
- (if dejunk (db:clean-up run-id mtdb))
- (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f mtdb tmpdb)
- )
- (begin
- (if dejunk (db:clean-up run-id tmpdb))
- (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb)
- )
- )
- (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time)))
- (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")
- )
- )
- )
- dbfiles
- )
- data-synced
- )
-)
+ (tmp-area (common:get-db-tmp-area))
+ (old2new (member 'old2new options))
+ (dejunk (member 'dejunk options))
+ (killservers (member 'killservers options))
+ (src-area (if old2new *toppath* tmp-area))
+ (dest-area (if old2new tmp-area *toppath*))
+ (dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
+ (keys (db:get-keys dbstruct))
+ (sync-durations (make-hash-table)))
+
+ ;; kill servers
+ (if killservers (db:kill-servers))
+
+ (if (not dbfiles)
+ (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.megatest"))
+ (for-each
+ (lambda (srcfile)
+ (debug:print-info 3 *default-log-port* "file: " srcfile)
+ (let* ((fname (conc (pathname-file srcfile) ".db"))
+ (basename (pathname-file srcfile))
+ (run-id (if (string= basename "main") #f (string->number basename)))
+ (destfile (conc dest-area "/.megatest/" fname))
+ (dest-directory (conc dest-area "/.megatest/"))
+ (time1 (file-modification-time srcfile))
+ (time2 (if (file-exists? destfile)
+ (begin
+ (debug:print-info 2 *default-log-port* "destfile " destfile " exists")
+ (file-modification-time destfile))
+ (begin
+ (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile)
+ 0)))
+ (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds
+
+ (do-cp (cond
+ ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover
+ (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile)
+ ;; TODO: Need to fix this for WAL mod. Can't just copy.
+ (system (conc "/bin/mkdir -p " dest-directory))
+ (system (conc "/bin/cp " srcfile " " destfile))
+ #t)
+ (changed ;; (and changed
+ #t)
+ ((and changed *time-to-exit*) ;; last sync
+ #t)
+ (else
+ #f))))
+ (if (or dejunk do-cp)
+ (let* ((start-time (current-milliseconds))
+ ;; subdb is misnamed - should be dbdat (I think...)
+ (subdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))
+ ;; (or (dbfile:get-subdb dbstruct run-id)
+ ;; (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc)))
+ (mtdb (dbr:subdb-mtdbdat subdb))
+ ;;
+ ;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .megatest/.db
+ ;;
+ (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc)))
+
+ (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds")
+ (if old2new
+ (begin
+ (if dejunk (db:clean-up run-id mtdb))
+ (db:sync-tables (db:sync-all-tables-list
+ dbstruct
+ (db:get-keys dbstruct))
+ #f mtdb tmpdb))
+ (begin
+ (if dejunk (db:clean-up run-id tmpdb))
+ (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb)))
+ (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time)))
+ (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date"))))
+ dbfiles))
+ data-synced))
;; Sync all changed db's
;;
(define (db:tmp->megatest.db-sync dbstruct run-id last-update)
(let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
(res '()))
(for-each
(lambda (subdb)
- (let* ((dbname (db:run-id->dbname run-id))
- (mtdb (dbr:subdb-mtdb subdb))
+ (let* ((mtdb (dbr:subdb-mtdb subdb))
(tmpdb (db:get-subdb dbstruct run-id))
(refndb (dbr:subdb-refndb subdb))
(newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
+ ;; BUG: verify this is really needed
(dbfile:add-dbdat dbstruct run-id tmpdb)
(set! res (cons newres res))))
subdbs)
res))
@@ -840,19 +932,21 @@
test_id INTEGER,
update_time TIMESTAMP,
cpuload INTEGER DEFAULT -1,
diskfree INTEGER DEFAULT -1,
diskusage INTGER DEFAULT -1,
- run_duration INTEGER DEFAULT 0);")
+ run_duration INTEGER DEFAULT 0,
+ last_update INTEGER DEFAULT (strftime('%s','now')));")
(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives (
id INTEGER PRIMARY KEY,
test_id INTEGER,
state TEXT DEFAULT 'new',
status TEXT DEFAULT 'n/a',
archive_type TEXT DEFAULT 'bup',
du INTEGER,
- archive_path TEXT);")))
+ archive_path TEXT,
+ last_update INTEGER DEFAULT (strftime('%s','now')));")))
(db:create-triggers db)
db)) ;; )
;;======================================================================
;; A R C H I V E S
@@ -886,10 +980,11 @@
"SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d
INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id
WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND
last_df > ?;")
dneeded))
+ ;; BUG: Verfify this is really needed
(dbfile:add-dbdat dbstruct #f dbdat)
blocks))
;; returns id of the record, register a disk allocated to archiving and record it's last known
;; available space
@@ -990,64 +1085,64 @@
;;======================================================================
;; M A I N T E N A N C E
;;======================================================================
-(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
- (let* ((incompleted '())
- (oldlaunched '())
- (toplevels '())
- (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
- (deadtime (if (and deadtime-str
- (string->number deadtime-str))
- (string->number deadtime-str)
- 72000))) ;; twenty hours
- (db:with-db
- dbstruct run-id #f
- (lambda (dbdat db)
- (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
-
- ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
- ;;
- ;; HOWEVER: this code in run:test seems to work fine
- ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
- ;; (db:test-get-run_duration testdat)))
- ;; 600)
- ;; (db:delay-if-busy dbdat)
- (sqlite3:for-each-row
- (lambda (test-id run-dir uname testname item-path)
- (if (and (equal? uname "n/a")
- (equal? item-path "")) ;; this is a toplevel test
- ;; what to do with toplevel? call rollup?
- (begin
- (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
- (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
- (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
- db
- "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');"
- run-id deadtime)
-
- ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
- ;;
- ;; (db:delay-if-busy dbdat)
- (sqlite3:for-each-row
- (lambda (test-id run-dir uname testname item-path)
- (if (and (equal? uname "n/a")
- (equal? item-path "")) ;; this is a toplevel test
- ;; what to do with toplevel? call rollup?
- (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
- (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
- db
- "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
- run-id)
-
- (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
- (if (and (null? incompleted)
- (null? oldlaunched)
- (null? toplevels))
- #f
- #t)))))
+;; (define (db:have-incompletes? dbstruct run-id ovr-deadtime)
+;; (let* ((incompleted '())
+;; (oldlaunched '())
+;; (toplevels '())
+;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
+;; (deadtime (if (and deadtime-str
+;; (string->number deadtime-str))
+;; (string->number deadtime-str)
+;; 72000))) ;; twenty hours
+;; (db:with-db
+;; dbstruct run-id #f
+;; (lambda (dbdat db)
+;; (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
+;;
+;; ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
+;; ;;
+;; ;; HOWEVER: this code in run:test seems to work fine
+;; ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
+;; ;; (db:test-get-run_duration testdat)))
+;; ;; 600)
+;; ;; (db:delay-if-busy dbdat)
+;; (sqlite3:for-each-row
+;; (lambda (test-id run-dir uname testname item-path)
+;; (if (and (equal? uname "n/a")
+;; (equal? item-path "")) ;; this is a toplevel test
+;; ;; what to do with toplevel? call rollup?
+;; (begin
+;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+;; (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
+;; (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
+;; (db:get-cache-stmth dbdat db
+;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
+;; run-id deadtime)
+;;
+;; ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
+;; ;;
+;; ;; (db:delay-if-busy dbdat)
+;; (sqlite3:for-each-row
+;; (lambda (test-id run-dir uname testname item-path)
+;; (if (and (equal? uname "n/a")
+;; (equal? item-path "")) ;; this is a toplevel test
+;; ;; what to do with toplevel? call rollup?
+;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+;; (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
+;; (db:get-cache-stmth dbdat db
+;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
+;; run-id)
+;;
+;; (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
+;; (if (and (null? incompleted)
+;; (null? oldlaunched)
+;; (null? toplevels))
+;; #f
+;; #t)))))
(define (db:get-status-from-final-status-file run-dir)
(let ((infile (conc run-dir "/.final-status")))
;; first verify we are able to write the output file
(if (not (file-read-access? infile))
@@ -1086,21 +1181,21 @@
(db:with-db
dbstruct run-id #f
(lambda (dbdat db)
(let* ((stmth1 (db:get-cache-stmth
- dbdat run-id db
+ dbdat db
"SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests
WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
AND state IN ('RUNNING');"))
(stmth2 (db:get-cache-stmth
- dbdat run-id db
+ dbdat db
"SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests
WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
AND state IN ('REMOTEHOSTSTART');"))
(stmth3 (db:get-cache-stmth
- dbdat run-id db
+ dbdat db
"SELECT id,rundir,uname,testname,item_path FROM tests
WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400
AND state IN ('LAUNCHED');")))
;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
;;
@@ -1365,45 +1460,53 @@
;; (set! *last-global-delta-printed* *global-delta*)))
(define (db:set-var dbstruct var val)
(db:with-db dbstruct #f #t
(lambda (dbdat db)
- (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))))
+ (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") var val))))
(define (db:add-var dbstruct var val)
(db:with-db dbstruct #f #t
(lambda (dbdat db)
- (sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var))))
+ (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+? WHERE var=?;") val var))))
(define (db:del-var dbstruct var)
(db:with-db dbstruct #f #t
(lambda (dbdat db)
- (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))
+ (sqlite3:execute (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var))))
;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================
-(define (db:no-sync-db db-in)
- (if db-in
- db-in
- (if *no-sync-db*
- *no-sync-db*
- (begin
- (mutex-lock! *db-access-mutex*)
- (let ((dbpath (common:get-db-tmp-area))
- (db (dbfile:open-no-sync-db dbpath)))
- (set! *no-sync-db* db)
- (mutex-unlock! *db-access-mutex*)
- db)))))
+(define (db:get-dbsync-path)
+ (case (rmt:transport-mode)
+ ((http)(common:get-db-tmp-area))
+ ((tcp) (conc *toppath*"/.megatest"))
+ ((nfs) (conc *toppath*"/.megatest"))
+ (else "/tmp/dunno-this-gonna-exist")))
+
+ ;; (define (db:no-sync-db db-in)
+ ;; (if db-in
+ ;; db-in
+ ;; (if *no-sync-db*
+ ;; *no-sync-db*
+ ;; (begin
+ ;; (mutex-lock! *db-access-mutex*)
+ ;; (let ((dbpath (db:get-dbsync-path))
+ ;; (db (dbfile:open-no-sync-db dbpath)))
+ ;; (assert (sqlite3:database? db) "FATAL: db:no-sync-db failed to open a database")
+ ;; (set! *no-sync-db* db)
+ ;; (mutex-unlock! *db-access-mutex*)
+ ;; db)))))
(define (with-no-sync-db proc)
- (let* ((db (db:no-sync-db *no-sync-db*)))
+ (let* ((db (db:open-no-sync-db)))
(proc db)))
(define (db:open-no-sync-db)
- (dbfile:open-no-sync-db (db:dbfile-path)))
+ (dbfile:open-no-sync-db (db:get-dbsync-path)))
(define (db:no-sync-close-db db stmt-cache)
(db:safely-close-sqlite3-db db stmt-cache))
@@ -1434,24 +1537,27 @@
(define (db:get-index-by-header header field)
(list-index (lambda (x)(equal? x field)) header))
;; look up values in a header/data structure
(define (db:get-value-by-header row header field)
- (if (or (null? header) (not row))
- #f
- (let loop ((hed (car header))
- (tal (cdr header))
- (n 0))
- (if (equal? hed field)
- (handle-exceptions
- exn
- (begin
- (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row="
- row " header=" header " field=" field ", exn=" exn)
- #f)
- (vector-ref row n))
- (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))
+ (let ((len (vector-length row)))
+ (if (or (null? header) (not row))
+ #f
+ (let loop ((hed (car header))
+ (tal (cdr header))
+ (n 0))
+ (if (equal? hed field)
+ (handle-exceptions
+ exn
+ (begin
+ (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row="
+ row " header=" header " field=" field ", exn=" exn)
+ #f)
+ (if (>= n len)
+ #f
+ (vector-ref row n)))
+ (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))))
;; Accessors for the header/data structure
;; get rows and header from
(define (db:get-header vec)(vector-ref vec 0))
(define (db:get-rows vec)(vector-ref vec 1))
@@ -1942,19 +2048,20 @@
(debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
(vector header
(reverse
- (db:with-db dbstruct #f #f ;; reads db, does not write to it.
- (lambda (dbdat db)
- (sqlite3:fold-row
- (lambda (res . r)
- (cons (list->vector r) res))
- '()
- db
- qry-str
- runnamepatt)))))))
+ (db:with-db
+ dbstruct #f #f ;; reads db, does not write to it.
+ (lambda (dbdat db)
+ (sqlite3:fold-row
+ (lambda (res . r)
+ (cons (list->vector r) res))
+ '()
+ db
+ qry-str
+ runnamepatt)))))))
;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;; this is inconsistent with get-runs but it makes some sense.
;;
@@ -2029,28 +2136,32 @@
(lambda (dbdat db)
(if msg
(sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id)
(sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))))
+(define (db:set-run-state-status-db dbdat db run-id state status )
+ (sqlite3:execute
+ (db:get-cache-stmth
+ dbdat db "UPDATE runs SET status=?,state=? WHERE id=?;") status state run-id))
+
(define (db:set-run-state-status dbstruct run-id state status )
(db:with-db
dbstruct #f #f
(lambda (dbdat db)
- (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id))))
-
-
-
+ (db:set-run-state-status-db dbdat db run-id state status))))
+
(define (db:get-run-status dbstruct run-id)
(let ((res "n/a"))
(db:with-db
dbstruct #f #f
(lambda (dbdat db)
(sqlite3:for-each-row
(lambda (status)
(set! res status))
- db
- "SELECT status FROM runs WHERE id=?;"
+ (db:get-cache-stmth
+ dbdat db
+ "SELECT status FROM runs WHERE id=?;" )
run-id)
res))))
(define (db:get-run-state dbstruct run-id)
(let ((res "n/a"))
@@ -2058,12 +2169,27 @@
dbstruct #f #f
(lambda (dbdat db)
(sqlite3:for-each-row
(lambda (status)
(set! res status))
- db
- "SELECT state FROM runs WHERE id=?;"
+ (db:get-cache-stmth
+ dbdat db
+ "SELECT state FROM runs WHERE id=?;" )
+ run-id)
+ res))))
+
+(define (db:get-run-state-status dbstruct run-id)
+ (let ((res (cons "n/a" "n/a")))
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (sqlite3:for-each-row
+ (lambda (state status)
+ (set! res (cons state status)))
+ (db:get-cache-stmth
+ dbdat db
+ "SELECT state,status FROM runs WHERE id=?;" )
run-id)
res))))
;;======================================================================
@@ -2306,12 +2432,12 @@
(sqlite3:for-each-row
(lambda (run-id testname item-path state status)
;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
(set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-")))
db
- "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;"
- test-id)))
+ "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;"
+ test-id run-id)))
res))
;; get a useful subset of the tests data (used in dashboard
;; use db:mintest-get-{id ,run_id,testname ...}
;;
@@ -2402,25 +2528,28 @@
;; NOTE: run-id is not used
;; ;;
(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment)
(db:with-db
dbstruct
- run-id
- #t
+ run-id #f
(lambda (dbdat db)
- (cond
- ((and newstate newstatus newcomment)
- (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
- test-id))
- ((and newstate newstatus)
- (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id))
- (else
- (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id))
- (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id))
- (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment)
- test-id))))))
- (mt:process-triggers dbstruct run-id test-id newstate newstatus))
+ (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment))))
+
+(define (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment)
+ (cond
+ ((and newstate newstatus newcomment)
+ (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
+ test-id))
+ ((and newstate newstatus)
+ (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id))
+ (else
+ (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id))
+ (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id))
+ (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment)
+ test-id))))
+ ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function
+ )
;; NEW BEHAVIOR: Count tests running in all runs!
;;
(define (db:get-count-tests-running dbstruct run-id) ;; fastmode)
(let* ((qry ;; (if fastmode
@@ -2429,11 +2558,11 @@
(db:with-db
dbstruct
run-id
#f
(lambda (dbdat db)
- (let* ((stmth (db:get-cache-stmth dbdat run-id db qry)))
+ (let* ((stmth (db:get-cache-stmth dbdat db qry)))
(sqlite3:first-result stmth))))))
;; NEW BEHAVIOR: Count tests running in only one run!
;;
(define (db:get-count-tests-actually-running dbstruct run-id)
@@ -2459,11 +2588,11 @@
(db:with-db
dbstruct
run-id
#f
(lambda (dbdat db)
- (let* ((stmth (db:get-cache-stmth dbdat run-id db qry)))
+ (let* ((stmth (db:get-cache-stmth dbdat db qry)))
(sqlite3:first-result stmth run-id))))))
;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;;
@@ -2472,11 +2601,11 @@
dbstruct
run-id
#f
(lambda (dbdat db)
(let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;")
- (stmth (db:get-cache-stmth dbdat run-id db stmt)))
+ (stmth (db:get-cache-stmth dbdat db stmt)))
(sqlite3:first-result
stmth run-id testname)))))
(define (db:get-not-completed-cnt dbstruct run-id)
(db:with-db
@@ -2672,11 +2801,11 @@
(lambda (run-id)
(let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
(db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs)))
run-ids)))
-;; Get test data using test_id, run-id is not used
+;; Get test data using test_id
;;
(define (db:get-test-info-by-id dbstruct run-id test-id)
(db:with-db
dbstruct
run-id
@@ -2685,12 +2814,28 @@
(let ((res #f))
(sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)
;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
(set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)))
- db
- (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;")
+ (db:get-cache-stmth dbdat db
+ (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;"))
+ test-id)
+ res))))
+
+;; Get test state, status using test_id
+;;
+(define (db:get-test-state-status-by-id dbstruct run-id test-id)
+ (db:with-db
+ dbstruct
+ run-id
+ #f
+ (lambda (dbdat db)
+ (let ((res (cons #f #f)))
+ (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
+ (lambda (state status)
+ (cons state status))
+ (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=?;")
test-id)
res))))
;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
@@ -2715,18 +2860,21 @@
(db:with-db
dbstruct
run-id
#f
(lambda (dbdat db)
- (let ((res #f))
- (sqlite3:for-each-row
- (lambda (a . b)
- (set! res (apply vector a b)))
- db
- (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;")
- test-name item-path run-id)
- res))))
+ (db:get-test-info-db db run-id test-name item-path))))
+
+(define (db:get-test-info-db db run-id test-name item-path)
+ (let ((res #f))
+ (sqlite3:for-each-row
+ (lambda (a . b)
+ (set! res (apply vector a b)))
+ db
+ (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;")
+ test-name item-path run-id)
+ res))
(define (db:test-get-rundir-from-test-id dbstruct run-id test-id)
(db:with-db
dbstruct
run-id
@@ -2843,11 +2991,11 @@
(db:with-db
dbstruct
run-id
#f
(lambda (dbdat db)
- (let* ((stmth (db:get-cache-stmth dbdat #f db stmt))
+ (let* ((stmth (db:get-cache-stmth dbdat db stmt))
(res (sqlite3:fold-row
(lambda (res id test-id category variable value expected tol units comment status type last-update)
(vector id test-id category variable value expected tol units comment status type last-update))
(vector #f #f #f #f #f #f #f #f #f #f #f #f)
stmth
@@ -3177,11 +3325,13 @@
test-name))
(item-path (db:test-get-item-path testdat))
(tl-testdat (db:get-test-info dbstruct run-id test-name ""))
(tl-test-id (if tl-testdat
(db:test-get-id tl-testdat)
- #f)))
+ #f))
+ (new-state-eh #f)
+ (new-status-eh #f))
(if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
(db:general-call dbstruct run-id 'set-test-start-time (list test-id)))
(mutex-lock! *db-transaction-mutex*)
(db:with-db
dbstruct run-id #f
@@ -3189,86 +3339,91 @@
(let ((tr-res
(sqlite3:with-transaction
db
(lambda ()
;; NB// Pass the db so it is part fo the transaction
- (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
+ (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status
(if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
- (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
- (state-statuses (db:roll-up-rules state-status-counts state status))
- (newstate (car state-statuses))
- (newstatus (cadr state-statuses)))
+ (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
+ (state-statuses (db:roll-up-rules state-status-counts state status))
+ (newstate (car state-statuses))
+ (newstatus (cadr state-statuses)))
+ (set! new-state-eh newstate)
+ (set! new-status-eh newstatus)
(debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: "
- (apply conc
- (map (lambda (x)
- (conc
- (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
- state-status-counts))); end debug:print
-
- (if tl-test-id
- (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
+ (apply conc
+ (map (lambda (x)
+ (conc
+ (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
+ state-status-counts))); end debug:print
+ (if tl-test-id
+ (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
))))))
(mutex-unlock! *db-transaction-mutex*)
(if (and test-id state status (equal? status "AUTO"))
(db:test-data-rollup dbstruct run-id test-id status))
+ (if new-state-eh ;; moved from db:test-set-state-status
+ (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh))
tr-res)))))
(define (db:roll-up-rules state-status-counts state status)
- (let* ((running (length (filter (lambda (x)
- (member (dbr:counts-state x) *common:running-states*))
- state-status-counts)))
- (bad-not-started (length (filter (lambda (x)
- (and (equal? (dbr:counts-state x) "NOT_STARTED")
- (not (member (dbr:counts-status x) *common:not-started-ok-statuses*))))
- state-status-counts)))
- (all-curr-states (common:special-sort ;; worst -> best (sort of)
- (delete-duplicates
- (if (and state (not (member state *common:dont-roll-up-states*)))
- (cons state (map dbr:counts-state state-status-counts))
- (map dbr:counts-state state-status-counts)))
- *common:std-states* >))
- (all-curr-statuses (common:special-sort ;; worst -> best
- (delete-duplicates
- (if (and state status (not (member state *common:dont-roll-up-states*)))
- (cons status (map dbr:counts-status state-status-counts))
- (map dbr:counts-status state-status-counts)))
- *common:std-statuses* >))
- (non-completes (filter (lambda (x)
- (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
- all-curr-states))
- (preq-fails (filter (lambda (x)
- (equal? x "PREQ_FAIL"))
- all-curr-statuses))
- (num-non-completes (length non-completes))
- (newstate (cond
- ((> running 0) "RUNNING") ;; anything running, call the situation running
- ((> (length preq-fails) 0) "NOT_STARTED")
- ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more.
- ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
- (else (car all-curr-states))))
- (newstatus (cond
- ((> (length preq-fails) 0) "PREQ_FAIL")
- ((or (> bad-not-started 0)
- (and (equal? newstate "NOT_STARTED")
- (> num-non-completes 0)))
- "STARTED")
- (else (car all-curr-statuses)))))
- (debug:print-info 2 *default-log-port*
- "\n--> probe db:set-state-status-and-roll-up-items: "
- "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
- "\n--> running: "running
- "\n--> bad-not-started: "bad-not-started
- "\n--> non-non-completes: "num-non-completes
- "\n--> non-completes: "non-completes
- "\n--> all-curr-states: "all-curr-states
- "\n--> all-curr-statuses: "all-curr-statuses
- "\n--> newstate "newstate
- "\n--> newstatus "newstatus
- "\n\n")
-
- ;; NB// Pass the db so it is part of the transaction
- (list newstate newstatus)))
+ (if (null? state-status-counts)
+ '(#f #f)
+ (let* ((running (length (filter (lambda (x)
+ (member (dbr:counts-state x) *common:running-states*))
+ state-status-counts)))
+ (bad-not-started (length (filter (lambda (x)
+ (and (equal? (dbr:counts-state x) "NOT_STARTED")
+ (not (member (dbr:counts-status x) *common:not-started-ok-statuses*))))
+ state-status-counts)))
+ (all-curr-states (common:special-sort ;; worst -> best (sort of)
+ (delete-duplicates
+ (if (and state (not (member state *common:dont-roll-up-states*)))
+ (cons state (map dbr:counts-state state-status-counts))
+ (map dbr:counts-state state-status-counts)))
+ *common:std-states* >))
+ (all-curr-statuses (common:special-sort ;; worst -> best
+ (delete-duplicates
+ (if (and state status (not (member state *common:dont-roll-up-states*)))
+ (cons status (map dbr:counts-status state-status-counts))
+ (map dbr:counts-status state-status-counts)))
+ *common:std-statuses* >))
+ (non-completes (filter (lambda (x)
+ (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
+ all-curr-states))
+ (preq-fails (filter (lambda (x)
+ (equal? x "PREQ_FAIL"))
+ all-curr-statuses))
+ (num-non-completes (length non-completes))
+ (newstate (cond
+ ((> running 0) "RUNNING") ;; anything running, call the situation running
+ ((> (length preq-fails) 0) "NOT_STARTED")
+ ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more.
+ ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
+ (else (car all-curr-states))))
+ (newstatus (cond
+ ((> (length preq-fails) 0) "PREQ_FAIL")
+ ((or (> bad-not-started 0)
+ (and (equal? newstate "NOT_STARTED")
+ (> num-non-completes 0)))
+ "STARTED")
+ (else (car all-curr-statuses)))))
+ (debug:print-info 2 *default-log-port*
+ "\n--> probe db:set-state-status-and-roll-up-items: "
+ "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
+ "\n--> running: "running
+ "\n--> bad-not-started: "bad-not-started
+ "\n--> non-non-completes: "num-non-completes
+ "\n--> non-completes: "non-completes
+ "\n--> all-curr-states: "all-curr-states
+ "\n--> all-curr-statuses: "all-curr-statuses
+ "\n--> newstate "newstate
+ "\n--> newstatus "newstatus
+ "\n\n")
+
+ ;; NB// Pass the db so it is part of the transaction
+ (list newstate newstatus))))
(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
(mutex-lock! *db-transaction-mutex*)
(db:with-db
dbstruct run-id #f
@@ -3275,56 +3430,54 @@
(lambda (dbdat db)
(let ((tr-res
(sqlite3:with-transaction
db
(lambda ()
- (let* ((state-status-counts (db:get-all-state-status-counts-for-run db run-id))
+ (let* ((state-status-counts (db:get-all-state-status-counts-for-run-db dbdat db run-id))
(state-statuses (db:roll-up-rules state-status-counts #f #f ))
(newstate (car state-statuses))
(newstatus (cadr state-statuses)))
(if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status)))
- (db:set-run-state-status db run-id newstate newstatus )))))))
+ (db:set-run-state-status-db dbdat db run-id newstate newstatus )))))))
(mutex-unlock! *db-transaction-mutex*)
tr-res))))
+
+(define (db:get-all-state-status-counts-for-run-db dbdat db run-id)
+ (sqlite3:map-row
+ (lambda (state status count)
+ (make-dbr:counts state: state status: status count: count))
+ (db:get-cache-stmth
+ dbdat db
+ "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;")
+ run-id ))
(define (db:get-all-state-status-counts-for-run dbstruct run-id)
- (let* ((test-count-recs (db:with-db
- dbstruct #f #f
- (lambda (dbdat db)
- (sqlite3:map-row
- (lambda (state status count)
- (make-dbr:counts state: state status: status count: count))
- db
- "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;"
- run-id )))))
- test-count-recs))
-
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (db:get-all-state-status-counts-for-run-db dbdat db run-id))))
;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
;;
;; NOTE: This is called within a transaction
;;
-(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in)
- (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path))
+(define (db:get-all-state-status-counts-for-test db run-id test-name item-path item-state-in item-status-in)
+ (let* ((test-info (db:get-test-info-db db run-id test-name item-path))
(item-state (or item-state-in (db:test-get-state test-info)))
(item-status (or item-status-in (db:test-get-status test-info)))
- (other-items-count-recs (db:with-db
- dbstruct run-id #f
- (lambda (dbdat db)
- (sqlite3:map-row
- (lambda (state status count)
- (make-dbr:counts state: state status: status count: count))
- db
- ;; ignore current item because we have changed its value in the current transation so this select will see the old value.
- "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;"
- run-id test-name item-path))))
-
+ (other-items-count-recs (sqlite3:map-row
+ (lambda (state status count)
+ (make-dbr:counts state: state status: status count: count))
+ db
+ ;; ignore current item because we have changed its value in the current transation so this select will see the old value.
+ "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;"
+ run-id test-name item-path))
;; add current item to tally outside of sql query
- (match-countrec-lambda (lambda (countrec)
- (and (equal? (dbr:counts-state countrec) item-state)
+ (match-countrec-lambda (lambda (countrec)
+ (and (equal? (dbr:counts-state countrec) item-state)
(equal? (dbr:counts-status countrec) item-status))))
-
+
(already-have-count-rec-list
(filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status
(updated-count-rec (if (null? already-have-count-rec-list)
(make-dbr:counts state: item-state status: item-status count: 1)
@@ -3334,11 +3487,10 @@
(nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec))))
(unrelated-rec-list
(filter nonmatch-countrec-lambda other-items-count-recs)))
-
(cons updated-count-rec unrelated-rec-list)))
;; (define (db:get-all-item-states db run-id test-name)
;; (sqlite3:map-row
;; (lambda (a) a)
@@ -4612,15 +4764,20 @@
(begin
(sqlite3:interrupt! db)
(sqlite3:finalize! db #t)
;; (vector-set! *task-db* 0 #f)
(set! *task-db* #f)))))
+ (if (and *no-sync-db*
+ (sqlite3:database? *no-sync-db*))
+ (sqlite3:finalize! *no-sync-db* #t))
(if (and (not (args:get-arg "-server"))
- *runremote*)
+ *runremote*
+ (eq? (rmt:transport-mode) 'http))
(begin
(debug:print-info 0 *default-log-port* "Closing all client connections...")
- (http-client#close-all-connections!)))
+ (http-transport:close-connections *runremote*)
+ #;(http-client#close-all-connections!)))
;; (if (and *runremote*
;; (remote-conndat *runremote*))
;; (begin
;; (http-client#close-all-connections!))) ;; for http-client
(if (not (eq? *default-log-port* (current-error-port)))
Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -37,13 +37,23 @@
stack
files
ports
commonmod
+ ;; debugprint
)
-;; (import debugprint)
+(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
+(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest
+(define dbfile:testsuite-name (make-parameter #f))
+(define dbfile:sync-method (make-parameter 'attach)) ;; 'attach or 'original
+
+;; 'original - use old condition code
+;; 'suicide-mode - create mtrah/stop-the-train with info on what went wrong
+;; else use no condition code (should be production mode)
+;;
+(define no-condition-db-with-db (make-parameter 'suicide-mode))
;;======================================================================
;; R E C O R D S
;;======================================================================
@@ -54,10 +64,19 @@
(areapath #f)
(homehost #f)
(tmppath #f)
(read-only #f)
(subdbs (make-hash-table))
+ ;;
+ ;; for the inmem approach (see dbmod.scm)
+ ;; this is one db per server
+ (inmem #f) ;; handle for the in memory copy
+ (dbfile #f) ;; path to the db file on disk
+ (ondiskdb #f) ;; handle for the on-disk file
+ (dbdat #f) ;; create a dbdat for the downstream calls such as db:with-db
+ (last-update 0)
+ (sync-proc #f)
)
;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
@@ -92,10 +111,11 @@
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access* #t)
(define *db-last-sync* 0) ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync*
+(define *db-last-access* (current-seconds))
(define (db:generic-error-printout exn . message)
(print-call-chain (current-error-port))
(apply dbfile:print-err message)
(dbfile:print-err
@@ -157,51 +177,26 @@
)
#f
)
)
-;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
-;; ;;
-;; (define (db:setup-db dbstruct areapath run-id)
-;; (let* ((dbname (db:run-id->dbname run-id))
-;; (dbstruct (hash-table-ref/default dbstructs dbname #f)))
-;; (if dbstruct
-;; dbstruct
-;; (let* ((dbstruct-new (make-dbr:dbstruct)))
-;; (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t)
-;; (hash-table-set! dbstructs dbname dbstruct-new)
-;; dbstruct-new))))
-
-;; ; Returns the dbdat for a particular dbfile inside the area
-;; ;;
-;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile)
-;; (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))
-;;
-;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
-;; (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
-;;
-;; (define (db:run-id->first-num run-id)
-;; (let* ((s (number->string run-id))
-;; (l (string-length s)))
-;; (substring s (- l 1) l)))
-
-;; 1234 => 4/1234.db
-;; #f => 0/main.db
-;; (abandoned the idea of num/db)
-;;
(define (dbfile:run-id->path apath run-id)
(conc apath"/"(dbfile:run-id->dbname run-id)))
(define (db:dbname->path apath dbname)
(conc apath"/"dbname))
+
+(define (dbfile:run-id->dbnum run-id)
+ (cond
+ ((number? run-id)
+ (modulo run-id (num-run-dbs)))
+ ((not run-id) "main") ;; 0 or main?
+ (else run-id)))
;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
(define (dbfile:run-id->dbname run-id)
- (cond
- ((number? run-id) (conc ".megatest/" (modulo run-id 100) ".db"))
- ((not run-id) (conc ".megatest/main.db"))
- (else run-id)))
+ (conc ".megatest/"(dbfile:run-id->dbnum run-id)".db"))
;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
;; called in http-transport and replicated in rmt.scm for *local* access.
;;
@@ -209,14 +204,12 @@
(cond
(*dbstruct-dbs*
(dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
*dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
(else
- (let* ((dbstruct (make-dbr:dbstruct)))
+ (let* ((dbstruct (make-dbr:dbstruct areapath: areapath tmppath: tmppath)))
(set! *dbstruct-dbs* dbstruct)
- (dbr:dbstruct-areapath-set! dbstruct areapath)
- (dbr:dbstruct-tmppath-set! dbstruct tmppath)
dbstruct))))
(define (dbfile:get-subdb dbstruct run-id)
(let* ((dbfname (dbfile:run-id->dbname run-id)))
(hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))
@@ -224,14 +217,17 @@
(define (dbfile:set-subdb dbstruct run-id subdb)
(hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb))
;; (define *dbfile:num-handles-in-use* 0)
-;; Get/open a database
+;; Get/open a database.
+;;
+;; NOTE: most usage should call dbfile:open-db to get a dbdat
+;;
;; if run-id => get run specific db
;; if #f => get main db
-;; if run-id is a string treat it as a filename
+;; if run-id is a string treat it as a filename - DON'T use this - we'll get rid of it.
;; if db already open - return inmem
;; if db not open, open inmem, rundb and sync then return inmem
;; inuse gets set automatically for rundb's
;;
(define (dbfile:get-dbdat dbstruct run-id)
@@ -241,12 +237,16 @@
(begin
(stack-pop! (dbr:subdb-dbstack subdb))))))
;; return a previously opened db handle to the stack of available handles
(define (dbfile:add-dbdat dbstruct run-id dbdat)
- (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
- (stack-push! (dbr:subdb-dbstack subdb) dbdat)
+ (let* ((subdb (dbfile:get-subdb dbstruct run-id))
+ (dbstk (dbr:subdb-dbstack subdb))
+ (count (stack-count dbstk)))
+ (if (> count 15)
+ (dbfile:print-err "WARNING: stack for "run-id".db is "count"."))
+ (stack-push! dbstk dbdat)
dbdat))
;; set up a subdb
;;
(define (dbfile:init-subdb dbstruct run-id init-proc)
@@ -326,21 +326,21 @@
(define (dbfile:print-err . params)
(with-output-to-port
(current-error-port)
(lambda ()
(apply print params))))
-
+
(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500))
- (let* ((busy-file (conc fname"-journal"))
+ (let* ((busy-file (conc fname "-journal"))
(delay-time (* (- 51 tries-left) 1.1))
(write-access (file-write-access? fname))
(dir-access (file-write-access? (pathname-directory fname)))
(retry (lambda ()
(thread-sleep! delay-time)
(if (> tries-left 0)
(dbfile:cautious-open-database fname init-proc
- sync-mode: sync-mode journal-mode: journal-mode
+ sync-mode journal-mode
(- tries-left 1))))))
(assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up."))
(if (and (file-write-access? fname)
(file-exists? busy-file))
@@ -351,11 +351,11 @@
(thread-sleep! 1)
(if (eq? tries-left 2)
(begin
(dbfile:print-err "INFO: forcing journal rollup "busy-file)
(dbfile:brute-force-salvage-db fname)))
- (dbfile:cautious-open-database fname init-proc sync-mode: sync-mode journal-mode: journal-mode (- tries-left 1)))
+ (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
(let* ((result (condition-case
(if dir-access
(dbfile:with-simple-file-lock
(conc fname ".lock")
@@ -402,29 +402,46 @@
"cp "backupfname" "fname)))
(dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
" "cmd)
(system cmd)))
+;; opens and returns handle and nothing else
+;;
+(define (dbfile:raw-open-no-sync-db dbpath)
+ (if (not (file-exists? dbpath))
+ (create-directory dbpath #t))
+ (let* ((dbname (conc dbpath "/no-sync.db"))
+ (db-exists (file-exists? dbname))
+ (init-proc (lambda (db)
+ (if (not db-exists)
+ (begin
+ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
+ )))
+ (on-tmp (equal? (car (string-split dbpath "/")) "tmp"))
+ (db (if on-tmp
+ (dbfile:cautious-open-database dbname init-proc 0 "WAL")
+ (dbfile:cautious-open-database dbname init-proc 0 #f)
+ ;; (sqlite3:open-database dbname)
+ )))
+ (if on-tmp ;; done in cautious-open-database
+ (begin
+ (sqlite3:execute db "PRAGMA synchronous = 0;")
+ (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))
+ db))
+
+(define (dbfile:with-no-sync-db dbpath proc)
+ (let* ((db (dbfile:raw-open-no-sync-db dbpath))
+ (res (proc db)))
+ (sqlite3:finalize! db)
+ res))
(define (dbfile:open-no-sync-db dbpath)
(if *no-sync-db*
*no-sync-db*
- (begin
- (if (not (file-exists? dbpath))
- (create-directory dbpath #t))
- (let* ((dbname (conc dbpath "/no-sync.db"))
- (db-exists (file-exists? dbname))
- (init-proc (lambda (db)
- (if (not db-exists)
- (begin
- (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
- )))
- (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname)))
- ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
- ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database
- (set! *no-sync-db* db)
- db))))
+ (let* ((db (dbfile:raw-open-no-sync-db dbpath)))
+ (set! *no-sync-db* db)
+ db)))
(define (db:no-sync-set db var val)
(sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
(define (db:no-sync-del! db var)
@@ -513,11 +530,12 @@
(db:sync-touched dbstruct runid keys dbinit)
(set! *db-sync-in-progress* #f)
(delete-file* lock-file)
#t)
(begin
- (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.")
+ (if (common:low-noise-print 120 (conc "no lock "from-db-file))
+ (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress."))
#f
))))
;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
;; ;;
@@ -556,11 +574,11 @@
(tmpdb (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
(start-t (current-seconds)))
(mutex-lock! *db-multi-sync-mutex*)
(let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
(mutex-unlock! *db-multi-sync-mutex*)
- (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
+ (db:sync-tables (db:sync-all-tables-list keys) update_info tmpdb mtdb))
(mutex-lock! *db-multi-sync-mutex*)
(set! *db-last-sync* start-t)
(set! *db-last-access* start-t)
(mutex-unlock! *db-multi-sync-mutex*)
(dbfile:add-dbdat dbstruct run-id tmpdb)
@@ -619,12 +637,12 @@
'("type" #f)
'("last_update" #f))))
;; needs db to get keys, this is for syncing all tables
;;
-(define (db:sync-main-list dbstruct keys)
- (let ((keys keys)) ;; (db:get-keys dbstruct)))
+(define (db:sync-main-list keys)
+ (let ((keys keys))
(list
(list "keys"
'("id" #f)
'("fieldname" #f)
'("fieldtype" #f))
@@ -658,14 +676,29 @@
'("reviewed" #f)
'("iterated" #f)
'("avg_runtime" #f)
'("avg_disk" #f)
'("tags" #f)
- '("jobgroup" #f)))))
+ '("jobgroup" #f))
+
+
+ (list "tasks_queue"
+ '("id" #f)
+ '("action" #f)
+ '("owner" #f)
+ '("state" #f)
+ '("target" #f)
+ '("name" #f)
+ '("testpatt" #f)
+ '("keylock" #f)
+ '("params" #f)
+ '("creation_time" #f)
+ '("execution_time" #f))
+ )))
-(define (db:sync-all-tables-list dbstruct keys)
- (append (db:sync-main-list dbstruct keys)
+(define (db:sync-all-tables-list keys)
+ (append (db:sync-main-list keys)
db:sync-tests-only))
;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are dbdat's
;;
@@ -870,11 +903,12 @@
)
)
tbls)
(let* ((runtime (- (current-milliseconds) start-time))
(should-print (or ;; (debug:debug-mode 12)
- (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
+ (common:low-noise-print 120 "db sync")
+ (> runtime 500)))) ;; low and high sync times treated as separate.
(for-each
(lambda (dat)
(let ((tblname (car dat))
(count (cdr dat)))
(set! tot-count (+ tot-count count))
@@ -915,30 +949,10 @@
FOR EACH ROW
BEGIN
UPDATE test_data SET last_update=(strftime('%s','now'))
WHERE id=old.id;
END;" )))
-;;
-;; ADD run-id SUPPORT
-;;
-(define (db:create-all-triggers dbstruct)
- (db:with-db
- dbstruct #f #f
- (lambda (dbdat db)
- (db:create-triggers db))))
-
-(define (db:create-triggers db)
- (for-each (lambda (key)
- (sqlite3:execute db (cadr key)))
- db:trigger-list))
-
-(define (db:drop-all-triggers dbstruct)
- (db:with-db
- dbstruct #f #f
- (lambda (dbdat db)
- (db:drop-triggers db))))
-
(define (db:is-trigger-dropped db tbl-name)
(let* ((trigger-name (if (equal? tbl-name "test_steps")
"update_teststeps_trigger"
(conc "update_" tbl-name "_trigger")))
(res #f))
@@ -982,20 +996,81 @@
;; call with dbinit=db:initialize-main-db
;;
(define (db:open-db dbstruct run-id dbinit)
;; (mutex-lock! *db-open-mutex*)
(let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
+ #;(case (rmt:transport-mode)
+ ((http) (dbfile:open-db dbstruct run-id dbinit))
+ ((tcp) (dbmod:open-db dbstruct run-id dbinit))
+ (else (assert #f "FATAL: rmt:transport-node not correct value"(rmt:transport-mode))))
(set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
;; (mutex-unlock! *db-open-mutex*)
dbdat))
(define dbfile:db-init-proc (make-parameter #f))
+
+;; in xmaxima this gives a curve close to what I want:
+;; plot2d ((exp(x/1.2)-1)/300, [x, 0, 10])$
+;; plot2d ((exp(x/1.5)-1)/40, [x, 0, 10])$
+;; plot2d ((exp(x/5)-1)/40, [x, 0, 20])$
+(define (dbfile:droop x)
+ (/ (- (exp (/ x 5)) 1) 40))
+ ;; (* numqrys (/ 1 (qif-slope))))
+
+;; create a dropping near the db file in a qif dir
+;; use count of such files to gate queries (queries in flight)
+;;
+(define (dbfile:wait-for-qif fname run-id params)
+ (let* ((thedir (pathname-directory fname))
+ (dbnum (dbfile:run-id->dbnum run-id))
+ (destdir (conc thedir"/qif-"dbnum))
+ (uniqn (get-area-path-signature (conc dbnum params)))
+ (crumbn (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id))))
+ (if (not (file-exists? destdir))(create-directory (conc destdir"/attic") #t))
+ (let loop ((count 0))
+ (let* ((currlks (glob (conc destdir"/*")))
+ (numqrys (length currlks))
+ (delayval (cond ;; do a droopish curve
+ ((> numqrys 25)
+ (for-each
+ (lambda (f)
+ (if (> (- (current-seconds)
+ (handle-exceptions
+ exn
+ (current-seconds) ;; file is likely gone, just fake out
+ (file-modification-time f)))
+ (keep-age-param))
+ (let* ((basedir (pathname-directory f))
+ (filen (pathname-file f))
+ (destf (conc basedir"/attic/"filen)))
+ (dbfile:print-err "Moving qif file "f" older than 10 seconds to "destf)
+ ;; (delete-file* f)
+ (handle-exceptions
+ exn
+ #t
+ (file-move f destf #t)))))
+ currlks)
+ 4)
+ ((> numqrys 0) (dbfile:droop numqrys)) ;; slope of 1/100
+ (else #f))))
+ (if (and delayval
+ (< count 5))
+ (begin
+ (thread-sleep! delayval)
+ (loop (+ count 1))))))
+ (with-output-to-file crumbn
+ (lambda ()
+ (print fname" run-id="run-id" params="params)
+ ))
+ crumbn))
;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
-(define (db:with-db dbstruct run-id r/w proc . params)
+(define (dbfile:with-db dbstruct run-id r/w proc params)
+ (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
+ (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
(let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
(have-struct (dbr:dbstruct? dbstruct))
(dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly
(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
#f))
@@ -1004,42 +1079,61 @@
dbstruct))
(fname (if dbdat
(dbr:dbdat-dbfile dbdat)
"nofilenameavailable"))
(jfile (conc fname"-journal"))
- #;(subdb (if have-struct
- (dbfile:get-subdb dbstruct run-id)
- #f))
- ) ;; was 25
+ (qryproc (lambda ()
+ (if use-mutex (mutex-lock! *db-with-db-mutex*))
+ (let ((res (apply proc dbdat db params))) ;; the actual call is here.
+ (if use-mutex (mutex-unlock! *db-with-db-mutex*))
+ ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
+ (if dbdat
+ (dbfile:add-dbdat dbstruct run-id dbdat))
+ ;; (delete-file* crumbfile)
+ res)))
+ (stop-train (conc (dbr:dbstruct-areapath dbstruct)"/stop-the-train")))
+
+ (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db
+ ", fname="fname)
(if (file-exists? jfile)
(begin
(dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
(thread-sleep! 0.2)))
(if (and use-mutex
(common:low-noise-print 120 "over-50-parallel-api-requests"))
- (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
+ (dbfile:print-err *api-process-request-count*
+ " parallel api requests being processed in process "
(current-process-id))) ;; ", throttling access"))
- (condition-case
- (begin
- (if use-mutex (mutex-lock! *db-with-db-mutex*))
- (let ((res (apply proc dbdat db params))) ;; the actual call is here.
- (if use-mutex (mutex-unlock! *db-with-db-mutex*))
- ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
- (if dbdat
- (dbfile:add-dbdat dbstruct run-id dbdat))
- res))
- (exn (io-error)
- (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
- (exn (corrupt)
- (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
- (exn (busy)
- (db:generic-error-printout exn "ERROR: database " fname
- " is locked. Try copying to another location, remove original and copy back."))
- (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
- (exn ()
- (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
- ((condition-property-accessor 'exn 'message) exn))))))
+ (case (no-condition-db-with-db)
+ ((production)(qryproc))
+ ((suicide-mode)
+ (handle-exceptions
+ exn
+ (with-output-to-file stop-train
+ (lambda ()
+ (db:generic-error-printout exn "Stop train mode, run-id: "run-id
+ " params: "params" proc: "proc)))
+ (qryproc)))
+ (else
+ (condition-case
+ (qryproc)
+ (exn (io-error)
+ (db:generic-error-printout exn "ERROR: i/o error with "fname
+ ". Check permissions, disk space etc. and try again."))
+ (exn (corrupt)
+ (db:generic-error-printout exn "ERROR: database "fname
+ " is corrupt. Repair it to proceed."))
+ (exn (busy)
+ (db:generic-error-printout exn "ERROR: database "fname
+ " is locked. Try copying to another location,"
+ " remove original and copy back."))
+ (exn (permission)(db:generic-error-printout exn "ERROR: database "fname
+ " has some permissions problem."))
+ (exn ()
+ (db:generic-error-printout exn "ERROR: Unknown error with database "fname
+ " message: "
+ ((condition-property-accessor 'exn 'message) exn))))))))
;;======================================================================
;; another attempt at a transactionized queue
;;======================================================================
@@ -1166,6 +1260,19 @@
(let ((res (proc)))
(dbfile:simple-file-release-lock fname)
res)
(assert #t "FATAL: simple file lock never got a lock."))))
+(define (db:get-cache-stmth dbdat db stmt)
+ (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id))
+ (stmt-cache (dbr:dbdat-stmt-cache dbdat))
+ ;; (stmth (db:hoh-get stmt-cache db stmt))
+ (stmth (hash-table-ref/default stmt-cache stmt #f)))
+ (or stmth
+ (let* ((newstmth (sqlite3:prepare db stmt)))
+ ;; (db:hoh-set! stmt-cache db stmt newstmth)
+ (hash-table-set! stmt-cache stmt newstmth)
+ newstmth))))
+
+
+
)
Index: dbmod.scm
==================================================================
--- dbmod.scm
+++ dbmod.scm
@@ -17,40 +17,398 @@
;; along with Megatest. If not, see .
;;======================================================================
(declare (unit dbmod))
+(declare (uses dbfile))
+(declare (uses commonmod))
+(declare (uses debugprint))
(module dbmod
*
-(import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:)
- posix typed-records srfi-18
- srfi-69)
-
-(define (db:run-id->dbname run-id)
- (cond
- ((number? run-id)(conc run-id ".db"))
- ((not run-id) "main.db")
- (else run-id)))
-
-
-;;======================================================================
-;; hash of hashs
-;;======================================================================
-
-
-(define (db:hoh-set! dat key1 key2 val)
- (let* ((subhash (hash-table-ref/default dat key1 #f)))
- (if subhash
- (hash-table-set! subhash key2 val)
- (begin
- (hash-table-set! dat key1 (make-hash-table))
- (db:hoh-set! dat key1 key2 val)))))
-
-(define (db:hoh-get dat key1 key2)
- (let* ((subhash (hash-table-ref/default dat key1 #f)))
- (and subhash
- (hash-table-ref/default subhash key2 #f))))
+(import scheme
+ chicken
+ data-structures
+ extras
+
+ (prefix sqlite3 sqlite3:)
+ posix
+ typed-records
+ srfi-1
+ srfi-18
+ srfi-69
+
+ commonmod
+ dbfile
+ debugprint
+ )
+
+;; NOTE: This returns only the name "1.db", "main.db", not the path
+;;
+(define (dbmod:run-id->dbfname run-id)
+ (conc (dbfile:run-id->dbnum run-id)".db"))
+
+(define (dbmod:get-dbdir dbstruct)
+ (let* ((areapath (dbr:dbstruct-areapath dbstruct))
+ (dbdir (conc areapath"/.megatest")))
+ (if (and (file-write-access? areapath)
+ (not (file-exists? dbdir)))
+ (create-directory dbdir))
+ dbdir))
+
+(define (dbmod:run-id->full-dbfname dbstruct run-id)
+ (conc (dbmod:get-dbdir dbstruct run-id)"/"(dbmod:run-id->dbfname run-id)))
+
+;;======================================================================
+;; Read-only inmem cached direct from disk method
+;;======================================================================
+
+(define *dbmod:nfs-db-handles* (make-hash-table)) ;; dbfname -> dbstruct
+
+(define (dbmod:nfs-get-dbstruct run-id keys init-proc areapath)
+ (assert areapath "FATAL: dbmod:nfs-get-dbstruct called without areapath set.")
+ (let* ((dbfname (dbmod:run-id->dbfname run-id))
+ (dbstruct (hash-table-ref/default *dbmod:nfs-db-handles* dbfname #f)))
+ (if dbstruct
+ (let* ((last-update (dbr:dbstruct-last-update dbstruct))
+ (curr-secs (current-seconds)))
+ (if (> (- curr-secs last-update) 2)
+ (begin
+ ((dbr:dbstruct-sync-proc dbstruct) last-update)
+ (dbr:dbstruct-last-update-set! dbstruct curr-secs)))
+ dbstruct)
+ (let* ((newdbstruct (dbmod:open-dbmoddb areapath run-id dbfname init-proc keys syncdir: 'fromdisk)))
+ (hash-table-set! *dbmod:nfs-db-handles* dbfname newdbstruct)
+ newdbstruct))))
+
+;;======================================================================
+;; The inmem one-db file per server method goes in here
+;;======================================================================
+
+(define (dbmod:with-db dbstruct run-id r/w proc params)
+ (let* ((dbdat (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
+ (dbh (dbr:dbdat-dbh dbdat))
+ (dbfile (dbr:dbdat-dbfile dbdat)))
+ (apply proc dbdat dbh params)))
+
+(define (dbmod:open-inmem-db initproc)
+ (let* ((db (sqlite3:open-database ":memory:"))
+ (handler (sqlite3:make-busy-timeout 3600)))
+ (sqlite3:set-busy-handler! db handler)
+ (initproc db)
+ db))
+
+(define (dbmod:open-db dbstruct run-id dbinit)
+ (or (dbr:dbstruct-dbdat dbstruct)
+ (let* ((dbdat (make-dbr:dbdat
+ dbfile: (dbr:dbstruct-dbfile dbstruct)
+ dbh: (dbr:dbstruct-inmem dbstruct)
+ )))
+ (dbr:dbstruct-dbdat-set! dbstruct dbdat)
+ dbdat)))
+
+;; Open the inmem db and the on-disk db
+;; populate the inmem db with data
+;;
+;; Updates fields in dbstruct
+;; Returns dbstruct
+;;
+;; * This routine creates the db if not found
+;; * Probably can get rid of the dbstruct-in
+;;
+(define (dbmod:open-dbmoddb areapath run-id dbfname-in init-proc keys
+ #!key (dbstruct-in #f)
+ (syncdir 'todisk))
+ (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
+ (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))
+ (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept
+ (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
+ (dbexists (file-exists? dbfullname))
+ (inmem (dbmod:open-inmem-db init-proc))
+ (write-access (file-write-access? dbpath))
+ (db (dbfile:with-simple-file-lock
+ (conc dbfullname".lock")
+ (lambda ()
+ (let* ((db (sqlite3:open-database dbfullname))
+ (handler (sqlite3:make-busy-timeout 136000)))
+ (sqlite3:set-busy-handler! db handler)
+ (if write-access
+ (init-proc db))
+ db))))
+ (tables (db:sync-all-tables-list keys)))
+ (dbr:dbstruct-inmem-set! dbstruct inmem)
+ (dbr:dbstruct-ondiskdb-set! dbstruct db)
+ (dbr:dbstruct-dbfile-set! dbstruct dbfullname)
+ (dbr:dbstruct-sync-proc-set! dbstruct
+ (lambda (last-update)
+ (sync-gasket tables last-update inmem db
+ dbfullname syncdir)))
+ ;; (dbmod:sync-tables tables #f db inmem)
+ (sync-gasket tables #f inmem db dbfullname 'fromdest)
+ (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
+ dbstruct))
+
+;; (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
+;; (dbmod:sync-tables tables last-update inmem db)
+;; (dbmod:sync-tables tables last-update db inmem))))
+
+;; direction: 'fromdest 'todest
+;;
+(define (sync-gasket tables last-update inmem dbh dbfname direction)
+ (case (dbfile:sync-method)
+ ((attach)
+ (dbmod:attach-sync tables inmem dbfname direction))
+ (else
+ (case direction
+ ((todest)
+ (dbmod:sync-tables tables last-update inmem dbh))
+ (else
+ (dbmod:sync-tables tables last-update dbh inmem))))))
+
+
+(define (dbmod:close-db dbstruct)
+ ;; do final sync to disk file
+ ;; (do-sync ...)
+ (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct)))
+
+;;======================================================================
+;; Sync db
+;;======================================================================
+
+(define (dbmod:calc-use-last-update has-last-update fields last-update)
+ (cond
+ ((and has-last-update
+ (member "last_update" fields))
+ #t) ;; if given a number, just use it for all fields
+ ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
+ ((and (pair? last-update)
+ (member (car last-update) ;; last-update field name
+ (map car fields)))
+ #t)
+ ((and last-update (not (pair? last-update)) (not (number? last-update)))
+ (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
+ #f)
+ (else
+ #f)))
+
+;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
+;; dbs are sqlite3 db handles
+;;
+;; if last-update specified ("field-name" . time-in-seconds)
+;; then sync only records where field-name >= time-in-seconds
+;; IFF field-name exists
+;;
+;; Use (db:sync-all-tables-list keys) to get the tbls input
+;;
+(define (dbmod:sync-tables tbls last-update fromdb todb)
+ (let ((stmts (make-hash-table)) ;; table-field => stmt
+ (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 ))
+ (numrecs (make-hash-table))
+ (start-time (current-milliseconds))
+ (tot-count 0))
+ (for-each ;; table
+ (lambda (tabledat)
+ (let* ((tablename (car tabledat))
+ (fields (cdr tabledat))
+ (has-last-update (member "last_update" fields))
+ (use-last-update (dbmod:calc-use-last-update has-last-update fields last-update))
+ (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
+ (if (number? last-update)
+ last-update
+ (cdr last-update))
+ #f))
+ (last-update-field (if use-last-update
+ (if (number? last-update)
+ "last_update"
+ (car last-update))
+ #f))
+ (num-fields (length fields))
+ (field->num (make-hash-table))
+ (num->field (apply vector (map car fields))) ;; BBHERE
+ (full-sel (conc "SELECT " (string-intersperse (map car fields) ",")
+ " FROM " tablename (if use-last-update ;; apply last-update criteria
+ (conc " WHERE " last-update-field " >= " last-update-value)
+ "")
+ ";"))
+ (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
+ " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
+ (fromdat '())
+ (fromdats '())
+ (totrecords 0)
+ (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
+ (todat (make-hash-table))
+ (count 0)
+ (field-names (map car fields)))
+
+ ;; set up the field->num table
+ (for-each
+ (lambda (field)
+ (hash-table-set! field->num field count)
+ (set! count (+ count 1)))
+ fields)
+
+ ;; read the source table
+ ;; store a list of all rows in the table in fromdat, up to batch-len.
+ ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
+ (sqlite3:for-each-row
+ (lambda (a . b)
+ (set! fromdat (cons (apply vector a b) fromdat))
+ (if (> (length fromdat) batch-len)
+ (begin
+ (set! fromdats (cons fromdat fromdats))
+ (set! fromdat '())
+ (set! totrecords (+ totrecords 1)))))
+ fromdb
+ full-sel)
+
+ ;; Count less than batch-len as a record
+ (if (> (length fromdat) 0)
+ (set! totrecords (+ totrecords 1)))
+
+ ;; tack on remaining records in fromdat
+ (if (not (null? fromdat))
+ (set! fromdats (cons fromdat fromdats)))
+
+ (sqlite3:for-each-row
+ (lambda (a . b)
+ (hash-table-set! todat a (apply vector a b)))
+ todb
+ full-sel)
+
+ ;; first pass implementation, just insert all changed rows
+ (let* ((db todb)
+ (drp-trigger (if (member "last_update" field-names)
+ (db:drop-trigger db tablename)
+ #f))
+ (has-last-update (member "last_update" field-names))
+ (is-trigger-dropped (if has-last-update
+ (db:is-trigger-dropped db tablename)
+ #f))
+ (stmth (sqlite3:prepare db full-ins))
+ (changed-rows 0))
+ (for-each
+ (lambda (fromdat-lst)
+ (sqlite3:with-transaction
+ db
+ (lambda ()
+ (for-each ;;
+ (lambda (fromrow)
+ (let* ((a (vector-ref fromrow 0))
+ (curr (hash-table-ref/default todat a #f))
+ (same #t))
+ (let loop ((i 0))
+ (if (or (not curr)
+ (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
+ (set! same #f))
+ (if (and same
+ (< i (- num-fields 1)))
+ (loop (+ i 1))))
+ (if (not same)
+ (begin
+ (apply sqlite3:execute stmth (vector->list fromrow))
+ (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
+ (set! changed-rows (+ changed-rows 1))))))
+ fromdat-lst))))
+ fromdats)
+
+ (sqlite3:finalize! stmth)
+ (if (member "last_update" field-names)
+ (db:create-trigger db tablename)))))
+ tbls)
+ (let* ((runtime (- (current-milliseconds) start-time))
+ (should-print (or ;; (debug:debug-mode 12)
+ (common:low-noise-print 120 "db sync")
+ (> runtime 500)))) ;; low and high sync times treated as separate.
+ (for-each
+ (lambda (dat)
+ (let ((tblname (car dat))
+ (count (cdr dat)))
+ (set! tot-count (+ tot-count count))))
+ (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
+ tot-count))
+
+(define (has-last-update dbh tablename)
+ (let* ((has-last #f))
+ (sqlite3:for-each-row
+ (lambda (name)
+ (if (equal? name "last_update")
+ (set! has-last #t)))
+ dbh
+ (conc "SELECT name FROM pragma_table_info('"tablename"') as tblInfo;"))
+ has-last))
+
+;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
+;;
+;; direction = fromdest, todest
+;; mode = 'full, 'incr
+;;
+;; Idea: youngest in dest is last_update time
+;;
+(define (dbmod:attach-sync tables dbh destdbfile direction #!key
+ (mode 'full)
+ (no-update '("keys")) ;; do
+ )
+ (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile)
+ (if (not (sqlite3:auto-committing? dbh))
+ (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.")
+ (let* ((table-names (map car tables))
+ (dest-exists (file-exists? destdbfile)))
+ (assert dest-exists "FATAL: sync called with non-existant file, "destdbfile)
+ ;; attach the destdbfile
+ ;; for each table
+ ;; insert into dest. select * from src. where last_update>last_update
+ ;; done
+ (debug:print 0 *default-log-port* "Attaching "destdbfile" as auxdb")
+ (sqlite3:execute dbh (conc "ATTACH '"destdbfile"' AS auxdb;"))
+ (for-each
+ (lambda (table)
+ (let* ((tbldat (alist-ref table tables equal?))
+ (fields (map car tbldat))
+ (fields-str (string-intersperse fields ","))
+ (dir (eq? direction 'todest))
+ (fromdb (if dir "" "auxdb."))
+ (todb (if dir "auxdb." ""))
+ (stmt1 (conc "INSERT OR IGNORE INTO "todb table
+ " SELECT * FROM "fromdb table";"))
+ (stmt2 (conc "INSERT OR REPLACE INTO "todb table
+ " SELECT * FROM "fromdb table" WHERE "
+ fromdb table".last_update > "
+ todb table".last_update;"))
+ (stmt3 (conc "INSERT OR REPLACE INTO "todb"."table
+ " SELECT * FROM "fromdb table";"))
+ (stmt4 (conc "DELETE FROM "todb table" WHERE "fromdb
+ table ".last_update > "todb table".last_update;"))
+ (stmt5 (conc "DELETE FROM "todb table";"))
+ (stmt6 (conc "INSERT OR REPLACE INTO "todb table" ("fields-str") SELECT "fields-str" FROM "fromdb table";"))
+ (start-ms (current-milliseconds)))
+ ;; (if (not (has-last-update dbh table))
+ ;; (sqlite3:execute dbh (conc "ALTER TABLE "table" ADD COLUMN last_update INTEGER;")))
+ ;; (if (not (has-last-update dbh (conc "auxdb."table)))
+ ;; (sqlite3:execute dbh (conc "ALTER TABLE auxdb."table" ADD COLUMN last_update INTEGER;")))
+ (if (sqlite3:auto-committing? dbh)
+ (begin
+ (sqlite3:with-transaction
+ dbh
+ (lambda ()
+ (sqlite3:execute dbh stmt5)
+ ;; (sqlite3:execute dbh stmt4) ;; if it worked this would be better for incremental up
+ ;; (sqlite3:execute dbh stmt1)
+ (sqlite3:execute dbh stmt6)
+ ))
+ (debug:print 0 *default-log-port* "Synced table "table
+ " in "(- (current-milliseconds) start-ms)"ms"))
+ (debug:print 0 *default-log-port* "Skipping sync of table "table" due to transaction in flight."))))
+ table-names)
+ (sqlite3:execute dbh "DETACH auxdb;"))))
+
+;; prefix is "" or "auxdb."
+;;
+;; (define (dbmod:last-update-patch dbh prefix)
+;; (let ((
+
+;;======================================================================
+;; Moved from dbfile
+;;======================================================================
+
)
Index: dcommon.scm
==================================================================
--- dcommon.scm
+++ dcommon.scm
@@ -27,10 +27,13 @@
(declare (unit dcommon))
(declare (uses gutils))
(declare (uses db))
+(declare (uses commonmod))
+
+(import commonmod)
;; (declare (uses synchash))
(include "megatest-version.scm")
(include "common_records.scm")
(include "db_records.scm")
@@ -635,11 +638,12 @@
(common:max (map cadr col-indices))))
(max-visible (max (- (dboard:tabdat-num-tests tabdat) 15) 3))
(max-col-vis (if (> max-col 10) 10 max-col))
(numrows 1)
(numcols 1))
- (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS")
+ (if (common:low-noise-print 60 "runs-stats-update-clear")
+ (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS"))
(iup:attribute-set! stats-matrix "NUMCOL" max-col )
(iup:attribute-set! stats-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20
(iup:attribute-set! stats-matrix "NUMCOL_VISIBLE" max-col-vis)
(iup:attribute-set! stats-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row))
;;(print "row-indices: " row-indices " col-indices: " col-indices)
@@ -704,11 +708,13 @@
#:numlin-visible 5
))
(colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
(updater (lambda ()
(if (dashboard:monitor-changed? commondat tabdat)
- (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10)))
+ (let ((servers (case (rmt:transport-mode)
+ ((http)(server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath* limit: 10)))
+ (else '()))))
(iup:attribute-set! servers-matrix "NUMLIN" (length servers))
;; (set! colnum 0)
;; (for-each (lambda (colname)
;; ;; (print "colnum: " colnum " colname: " colname)
;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
Index: diff-report.scm
==================================================================
--- diff-report.scm
+++ diff-report.scm
@@ -17,10 +17,12 @@
;;
(declare (unit diff-report))
(declare (uses common))
(declare (uses rmt))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(use matchable)
(use fmt)
(use ducttape-lib)
ADDED docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf
Index: docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf
==================================================================
--- /dev/null
+++ docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf
cannot compute difference between binary files
ADDED docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf
Index: docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf
==================================================================
--- /dev/null
+++ docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf
cannot compute difference between binary files
ADDED docs/reference/queues-dont-fix-overload.pdf
Index: docs/reference/queues-dont-fix-overload.pdf
==================================================================
--- /dev/null
+++ docs/reference/queues-dont-fix-overload.pdf
cannot compute difference between binary files
Index: ezsteps.scm
==================================================================
--- ezsteps.scm
+++ ezsteps.scm
@@ -27,10 +27,12 @@
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))
;; (declare (uses filedb))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
@@ -38,11 +40,11 @@
;;(rmt:get-test-info-by-id run-id test-id) -> testdat
;; TODO: deprecate me in favor of ezsteps.scm
;;
-(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)
+(define (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)
(let* ((stepname (car ezstep)) ;; do stuff to run the step
(stepinfo (cadr ezstep))
;; (let ((info (cadr ezstep)))
;; (if (proc? info) "" info)))
;; (stepproc (let ((info (cadr ezstep)))
@@ -63,11 +65,12 @@
(script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
(logpro-file (conc stepname ".logpro"))
(html-file (conc stepname ".html"))
(dat-file (conc stepname ".dat"))
(tconfig-logpro (configf:lookup testconfig "logpro" stepname))
- (logpro-used (common:file-exists? logpro-file)))
+ (logpro-used (common:file-exists? logpro-file))
+ (mtexepath (common:get-megatest-exe-path)))
(setenv "MT_STEP_NAME" stepname)
(hash-table-set! all-steps-dat stepname `((params . ,paramparts)))
(debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
@@ -96,11 +99,11 @@
(debug:print 4 *default-log-port* "script: " script)
(rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
;; now launch the actual process
(call-with-environment-variables
- (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
+ (list (cons "PATH" mtexepath))
(lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
(let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1
(pid #f))
(let ((proc (lambda ()
(set! pid (process-run "/bin/bash" (list "-c" cmd))))))
Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -153,11 +153,11 @@
(determine-proxy (constantly #f)))
(debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
(handle-exceptions
exn
(begin
- (print-error-message exn)
+ ;; (print-error-message exn)
(if (< portnum 64000)
(begin
(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 5 *default-log-port* "exn=" (condition->list exn))
@@ -241,25 +241,17 @@
(mutex-lock! *http-mutex*)
(set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
;; Send "cmd" with json payload "params" to serverdat and receive result
;;
-(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f))
- (let* ((fullurl (if (vector? serverdat)
- (http-transport:server-dat-get-api-req serverdat)
- (begin
- (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
- (exit 1))))
+(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3))
+ (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat)
+ (let* ((fullurl (remote-api-req runremote))
(res (vector #f "uninitialized"))
(success #t)
(sparams (db:obj->string params transport: 'http))
- (runremote (or area-dat *runremote*))
- (server-id (if (vector? serverdat)
- (http-transport:server-dat-get-server-id serverdat)
- (begin
- (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
- (exit 1)))))
+ (server-id (remote-server-id runremote)))
(debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds))
;; set up the http-client here
(max-retry-attempts 1)
;; consider all requests indempotent
@@ -286,21 +278,13 @@
(debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
(debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
(debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
(debug:print 0 *default-log-port* " call-chain: " call-chain)))
;; what if another thread is communicating ok? Can't happen due to mutex
- (set! *runremote* #f)
- (set! runremote #f)
- ;; (if runremote
- ;; (remote-conndat-set! runremote #f))
- ;; Killing associated server to allow clean retry.")
- ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine?
+ (http-transport:close-connections runremote)
(mutex-unlock! *http-mutex*)
- ;; (signal (make-composite-condition
- ;; (make-property-condition 'commfail 'message "failed to connect to server")))
- ;; "communications failed"
- (close-all-connections!)
+ ;; (close-connection! fullurl)
(db:obj->string #f))
(with-input-from-request ;; was dat
fullurl
(list (cons 'key (or server-id "thekey"))
(cons 'cmd cmd)
@@ -345,67 +329,28 @@
'timeout
'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))
;; careful closing of connections stored in *runremote*
;;
-(define (http-transport:close-connections #!key (area-dat #f))
- (let* ((runremote (or area-dat *runremote*))
- (server-dat (if runremote
- (remote-conndat runremote)
- #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
- (if (vector? server-dat)
- (let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
- (handle-exceptions
+(define (http-transport:close-connections runremote)
+ (if (remote? runremote)
+ (let ((api-dat (remote-api-uri runremote)))
+ (handle-exceptions
exn
- (begin
- (print-call-chain *default-log-port*)
- (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
- (close-connection! api-dat)
- (close-idle-connections!)
- #t))
- #f)))
-
-
-(define (make-http-transport:server-dat)(make-vector 6))
-(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0))
-(define (http-transport:server-dat-get-port vec) (vector-ref vec 1))
-(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2))
-(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3))
-(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4))
-(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5))
-;(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6))
-(define (http-transport:server-dat-get-server-id vec) (vector-ref vec 6))
-
-(define (http-transport:server-dat-make-url vec)
- (if (and (http-transport:server-dat-get-iface vec)
- (http-transport:server-dat-get-port vec))
- (conc "http://"
- (http-transport:server-dat-get-iface vec)
- ":"
- (http-transport:server-dat-get-port vec))
- #f))
-
-(define (http-transport:server-dat-update-last-access vec)
- (if (vector? vec)
- (vector-set! vec 5 (current-seconds))
- (begin
- (print-call-chain (current-error-port))
- (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))
-
-;;
-;; connect
-;;
-(define (http-transport:client-connect iface port server-id)
- (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id)
- (let* ((api-url (conc "http://" iface ":" port "/api"))
- (api-uri (uri-reference (conc "http://" iface ":" port "/api")))
- (api-req (make-request method: 'POST uri: api-uri))
- (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
- server-dat))
-
-
-
+ (begin
+ (print-call-chain *default-log-port*)
+ (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
+ (if (args:any-defined? "-server" "-execute" "-run")
+ (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
+ (if api-dat (close-connection! api-dat))
+
+ ;; Would it be better to set *runremote* to #f? I don't think so. But we may
+ ;; need to clear more of the runremote fields
+ (remote-api-url-set! runremote #f) ;; used as a flag for connection up and running
+
+ #t))
+ #f))
;; run http-transport:keep-running in a parallel thread to monitor that the db is being
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running)
@@ -429,11 +374,11 @@
(set! sdat *server-info*)
(mutex-unlock! *heartbeat-mutex*)
(if (and sdat
(not changed)
(> (- (current-seconds) start-time) 2))
- (let* ((servinfodir (conc *toppath*"/.servinfo"))
+ (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo"))
(ipaddr (car sdat))
(port (cadr sdat))
(servinf (conc servinfodir"/"ipaddr":"port)))
(set! servinfofile servinf)
(if (not (file-exists? servinfodir))
@@ -448,34 +393,23 @@
(lambda ()
(delete-file* servinf))
*on-exit-procs*))
;; put data about this server into a simple flat file host.port
(debug:print-info 0 *default-log-port* "Received server alive signature")
- #;(common:save-pkt `((action . alive)
- (T . server)
- (pid . ,(current-process-id))
- (ipaddr . ,(car sdat))
- (port . ,(cadr sdat)))
- *configdat* #t)
sdat)
(begin
(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
(sleep 4)
(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
- (let* ((ipaddr (car sdat))
+ (if sdat
+ (let* ((ipaddr (car sdat))
(port (cadr sdat))
- (servinf (conc *toppath*"/.servinfo/"ipaddr":"port)))
- (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
- ;; (delete-file* servinf) ;; handled by on-exit, can be removed
- #;(common:save-pkt `((action . died)
- (T . server)
- (pid . ,(current-process-id))
- (ipaddr . ,(car sdat))
- (port . ,(cadr sdat))
- (msg . "Transport died?"))
- *configdat* #t)
+ (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port)))
+ (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
(exit))
+ (exit)
+ )
(loop start-time
(equal? sdat last-sdat)
sdat)))))))
(iface (car server-info))
(port (cadr server-info))
@@ -504,11 +438,12 @@
(if (and no-sync-db
(common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
(begin
(if (common:low-noise-print 120 "sync-all-print")
(debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")))
- (db:all-db-sync *dbstruct-dbs*))))
+ (db:all-db-sync *dbstruct-dbs*)
+ )))
;; when things go wrong we don't want to be doing the various queries too often
;; so we strive to run this stuff only every four seconds or so.
(let* ((sync-time (- (current-milliseconds) start-time))
(rem-time (quotient (- 4000 sync-time) 1000)))
@@ -560,22 +495,22 @@
(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
(let ((curr-time (current-seconds)))
(handle-exceptions
exn
(debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
- (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
+ (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
(not *server-overloaded*)
(file-exists? servinfofile))
(change-file-times servinfofile curr-time curr-time)))
- (if (or (common:low-noise-print 120 "start new server")
+ (if (and (common:low-noise-print 120 "start new server")
(> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
(begin
- (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...")
+ (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...")
(server:kind-run *toppath*)
(if (> *api-process-request-count* 100)
(begin
- (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile)
+ (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile)
(delete-file* servinfofile)))))))
(loop 0 server-state bad-sync-count (current-milliseconds)))
(else
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(http-transport:server-shutdown port)))))))
@@ -621,59 +556,36 @@
;; all routes though here end in exit ...
;;
;; start_server?
;;
(define (http-transport:launch)
- ;; check that a server start is in progress, pause or exit if so
- (let* ((tmp-area (common:get-db-tmp-area))
- (server-start (conc tmp-area "/.server-start"))
- (server-started (conc tmp-area "/.server-started"))
- (start-time (common:lazy-modification-time server-start))
- (started-time (common:lazy-modification-time server-started))
- (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
- (start-time-old (> (- (current-seconds) start-time) 5))
- (cleanup-proc (lambda (msg)
- (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
- (full-serv-fname (conc *toppath* "/logs/" serv-fname))
- (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname)))
- (debug:print 0 *default-log-port* msg)
- (if (common:file-exists? full-serv-fname)
- (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
- (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
- (exit)))))
- #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago
- (not server-starting))
- (begin
- (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
- (exit)))
- ;; lets not even bother to start if there are already three or more server files ready to go
- #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*))))
- (if (> num-alive 3)
- (begin
- (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
- (exit))))
- #;(common:save-pkt `((action . start)
- (T . server)
- (pid . ,(current-process-id)))
- *configdat* #t)
- (let* ((th2 (make-thread (lambda ()
- (debug:print-info 0 *default-log-port* "Server run thread started")
- (http-transport:run
- (if (args:get-arg "-server")
- (args:get-arg "-server")
- "-")
- )) "Server run"))
- (th3 (make-thread (lambda ()
- (debug:print-info 0 *default-log-port* "Server monitor thread started")
- (http-transport:keep-running)
- "Keep running"))))
- (thread-start! th2)
- (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
- (thread-start! th3)
- (set! *didsomething* #t)
- (thread-join! th2)
- (exit))))
+ ;; check the .servinfo directory, are there other servers running on this
+ ;; or another host?
+ (let* ((server-start-is-ok (server:minimal-check *toppath*)))
+ (if (not server-start-is-ok)
+ (begin
+ (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.")
+ (exit 1))))
+
+ ;; check that a server start is in progress, pause or exit if so
+ (let* ((th2 (make-thread (lambda ()
+ (debug:print-info 0 *default-log-port* "Server run thread started")
+ (http-transport:run
+ (if (args:get-arg "-server")
+ (args:get-arg "-server")
+ "-")
+ )) "Server run"))
+ (th3 (make-thread (lambda ()
+ (debug:print-info 0 *default-log-port* "Server monitor thread started")
+ (http-transport:keep-running)
+ "Keep running"))))
+ (thread-start! th2)
+ (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
+ (thread-start! th3)
+ (set! *didsomething* #t)
+ (thread-join! th2)
+ (exit)))
;; (define (http-transport:server-signal-handler signum)
;; (signal-mask! signum)
;; (handle-exceptions
;; exn
Index: index-tree.scm
==================================================================
--- index-tree.scm
+++ index-tree.scm
@@ -29,10 +29,12 @@
(declare (uses lock-queue))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
Index: items.scm
==================================================================
--- items.scm
+++ items.scm
@@ -21,10 +21,12 @@
;; (temperature "cool medium hot")
;; (season "summer winter fall spring")))
(declare (unit items))
(declare (uses common))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
;; Puts out all combinations
(define (process-itemlist hierdepth curritemkey itemlist)
Index: keys.scm
==================================================================
--- keys.scm
+++ keys.scm
@@ -24,10 +24,12 @@
(use sqlite3 srfi-1 posix regex regex-case srfi-69)
(import (prefix sqlite3 sqlite3:))
(declare (unit keys))
(declare (uses common))
+(declare (uses commonmod))
+(import commonmod)
(include "key_records.scm")
(include "common_records.scm")
(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ...
Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -29,18 +29,23 @@
(import (prefix sqlite3 sqlite3:))
(declare (unit launch))
(declare (uses subrun))
(declare (uses common))
+(declare (uses commonmod))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))
+(declare (uses dbfile))
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")
+
+(import commonmod
+ dbfile)
;;======================================================================
;; ezsteps
;;======================================================================
@@ -183,11 +188,11 @@
(tal (cdr ezstepslst))
(prevstep #f))
(debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
;; check exit-info (vector-ref exit-info 1)
(if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
- (let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat))
+ (let* ((logpro-used (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat))
(stepname (car ezstep))
(stepparms (hash-table-ref all-steps-dat stepname)))
(setenv "MT_STEP_NAME" stepname)
(pp (hash-table->alist all-steps-dat))
;; if logpro-used read in the stepname.dat file
@@ -205,11 +210,11 @@
)
))))))
(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
- (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
+ (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "60")))
(start-seconds (current-seconds))
(calc-minutes (lambda ()
(inexact->exact
(round
(-
@@ -239,13 +244,13 @@
(> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
df
#f)))
(do-sync (or new-cpu-load new-disk-free over-time))
- (test-info (rmt:get-test-info-by-id run-id test-id))
- (state (db:test-get-state test-info))
- (status (db:test-get-status test-info))
+ (test-info (rmt:get-test-state-status-by-id run-id test-id))
+ (state (car test-info));; (db:test-get-state test-info))
+ (status (cdr test-info));; (db:test-get-status test-info))
(kill-reason "no kill reason specified")
(kill-job? #f))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
(cond
((test-get-kill-request run-id test-id)
@@ -259,11 +264,12 @@
(rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
(set! kill-job? #f)))
(debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
- (launch:handle-zombie-tests run-id)
+ (if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty
+ (launch:handle-zombie-tests run-id))
(when do-sync
;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds)))
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
@@ -314,11 +320,11 @@
(debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
(tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
)))
(mutex-unlock! m)
;; no point in sticking around. Exit now. But run end of run before exiting?
- (launch:end-of-run-check run-id)
+ (launch:end-of-run-check run-id)
(exit)))
(if (hash-table-ref/default misc-flags 'keep-going #f)
(begin
(thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
(if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
@@ -765,20 +771,28 @@
;; new
;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
;; 0 RUNNING ==> this is actually the first condition, should not get here
-
+(define *last-rollup* 0)
(define (launch:end-of-run-check run-id )
(let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id))
- (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
+ (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
(all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
- (current-state (rmt:get-run-state run-id))
- (current-status (rmt:get-run-status run-id)))
- ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing
- (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)
- (rmt:set-state-status-and-roll-up-run run-id current-state current-status)
+ (current-state-status (rmt:get-run-state-status run-id))
+ (current-state (car current-state-status)) ;; (rmt:get-run-state run-id))
+ (current-status (cdr current-state-status))) ;; (rmt:get-run-status run-id)))
+ ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing
+ (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)
+ ;;
+ ;; TODO: add a final rollup when run is done (if there isn't one already)
+ ;;
+ (if (or (< running-cnt 3) ;; have only few running
+ (> (- (current-seconds) *last-rollup*) 10)) ;; or haven't rolled up in past ten seconds
+ (begin
+ (rmt:set-state-status-and-roll-up-run run-id current-state current-status)
+ (set! *last-rollup* (current-seconds))))
(runs:update-junit-test-reporter-xml run-id)
(cond
((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
(if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id)))
(begin
@@ -1131,11 +1145,14 @@
(setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
(begin
(debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
(set! *toppath* #f) ;; force it to be false so we return #f
#f))
-
+
+ ;; needed by various transport and db modules
+ (dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*))
+
;; one more attempt to cache the configs for future reading
(let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
(mtcachef (car cachefiles))
(rccachef (cdr cachefiles)))
@@ -1567,11 +1584,11 @@
;; (list 'serverinf *server-info*)
#;(list 'homehost (let* ((hhdat (server:get-homehost)))
(if hhdat
(car hhdat)
#f)))
- (list 'serverurl (if *runremote*
+ #;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED
(remote-server-url *runremote*)
#f)) ;;
(list 'areaname (common:get-testsuite-name))
(list 'toppath *toppath*)
(list 'work-area work-area)
Index: lock-queue.scm
==================================================================
--- lock-queue.scm
+++ lock-queue.scm
@@ -19,10 +19,12 @@
(use (prefix sqlite3 sqlite3:) srfi-18)
(declare (unit lock-queue))
(declare (uses common))
(declare (uses tasks))
+(declare (uses commonmod))
+(import commonmod)
;;======================================================================
;; attempt to prevent overlapping updates of rollup files by queueing
;; update requests in an sqlite db
;;======================================================================
Index: margs.scm
==================================================================
--- margs.scm
+++ margs.scm
@@ -23,10 +23,20 @@
(define (args:get-arg arg . default)
(if (null? default)
(hash-table-ref/default args:arg-hash arg #f)
(hash-table-ref/default args:arg-hash arg (car default))))
+
+;; get an arg as a number
+(define (args:get-arg-number arg . default)
+ (let* ((val-str (args:get-arg arg))
+ (val (if val-str (string->number val-str) #f)))
+ (if val
+ val
+ (if (null? default)
+ #f
+ default))))
(define (args:any? . args)
(not (null? (filter (lambda (x) x)
(map args:get-arg args)))))
Index: megatest-version.scm
==================================================================
--- megatest-version.scm
+++ megatest-version.scm
@@ -18,6 +18,6 @@
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
;; (declare (unit megatest-version))
-(define megatest-version 1.7009)
+(define megatest-version 1.8007)
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -23,10 +23,18 @@
(define (toplevel-command . a) #f)
(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses margs))
+(declare (uses mtargs))
+(declare (uses mtargs.import))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
+(declare (uses mtargs))
+(declare (uses mtargs.import))
+(declare (uses debugprint))
+(declare (uses debugprint.import))
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
@@ -41,27 +49,29 @@
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses db))
+(declare (uses dbfile))
+(declare (uses dbfile.import))
(declare (uses dbmod))
(declare (uses dbmod.import))
-(declare (uses commonmod))
-(declare (uses commonmod.import))
-(declare (uses dbfile))
-(declare (uses dbfile.import))
+(declare (uses tcp-transportmod))
+(declare (uses tcp-transportmod.import))
;; (declare (uses debugprint))
;; (declare (uses debugprint.import))
-;; (declare (uses mtargs))
-;; (declare (uses mtargs.import))
;; (declare (uses ftail))
;; (import ftail)
-(import dbmod
+(import mtargs
+ debugprint
+ dbmod
commonmod
- dbfile)
+ dbfile
+ tcp-transportmod
+ )
(define *db* #f) ;; this is only for the repl, do not use in general!!!!
(include "common_records.scm")
(include "key_records.scm")
@@ -69,11 +79,11 @@
(include "run_records.scm")
(include "megatest-fossil-hash.scm")
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
readline apropos json http-client directory-utils typed-records
- http-client srfi-18 extras format)
+ http-client srfi-18 extras format tcp-server tcp)
;; Added for csv stuff - will be removed
;;
(use sparse-vectors)
@@ -80,10 +90,14 @@
(require-library mutils)
(define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file
(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file
+;; set some parameters here - these need to be put in something that can be loaded from other
+;; executables such as dashboard and mtutil
+;;
+(include "transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)
;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
@@ -373,10 +387,11 @@
"-load" ;; load and exectute a scheme file
"-section"
"-var"
"-dumpmode"
"-run-id"
+ "-db"
"-ping"
"-refdb2dat"
"-o"
"-log"
"-sync-log"
@@ -585,16 +600,16 @@
;; where (launch:setup) returns #f?
;;
(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
(handle-exceptions
exn
- (begin
- (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
- )
+ (begin
+ (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
(let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
+ (dbname (args:get-arg "-db")) ;; for the server logfile name
(logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
- (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log")))
+ (conc tl "/logs/server-"(or dbname "unk")"-"(current-process-id) "-" (get-host-name) ".log")))
(oup (open-logfile logf)))
(if (not (args:get-arg "-log"))
(hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
(debug:print-info 0 *default-log-port* "Sending log output to " logf)
(set! *default-log-port* oup))))
@@ -654,23 +669,10 @@
;; for some switches always print the command to stderr
;;
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
(debug:print 0 *default-log-port* (string-intersperse (argv) " ")))
-;; some switches imply homehost. Exit here if not on homehost
-;;
-(let ((homehost-required (list "-cleanup-db")))
- (if (apply args:any? homehost-required)
- (if (not (server:choose-server *toppath* 'home?))
- (for-each
- (lambda (switch)
- (if (args:get-arg switch)
- (begin
- (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch
- ", you can move homehost by removing the .homehost file but this will disrupt any runs in progress.")
- (exit 1))))
- homehost-required))))
;;======================================================================
;; Misc setup stuff
;;======================================================================
@@ -934,13 +936,24 @@
;;======================================================================
;; Server? Start up here.
;;
(if (args:get-arg "-server")
- (let ((tl (launch:setup))
- (transport-type (string->symbol (or (args:get-arg "-transport") "http"))))
- (server:launch 0 transport-type)
+ (let* (;; (run-id (args:get-arg "-run-id"))
+ (dbfname (args:get-arg "-db"))
+ (tl (launch:setup))
+ (keys (keys:config-get-fields *configdat*)))
+ (case (rmt:transport-mode)
+ ((http)(http-transport:launch))
+ ((tcp)
+ (debug:print 0 *default-log-port* "INFO: Running using tcp method.")
+ (if dbfname
+ (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys)
+ (begin
+ (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.")
+ (exit 1))))
+ (else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode))))
(set! *didsomething* #t)))
;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
@@ -953,20 +966,26 @@
(args:get-arg "-kill-servers"))
(let ((tl (launch:setup)))
(if tl ;; all roads from here exit
(let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
(fmtstr "~33a~22a~20a~20a~8a\n"))
- (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
- (format #t fmtstr "==" "=========" "=========" "========" "=====")
- (for-each ;; ( mod-time host port start-time pid )
+ (if (not servers)
+ (begin
+ (debug:print-info 1 *default-log-port* "No servers found")
+ (exit)
+ )
+ )
+ (format #t fmtstr "PID" "host:port" "age (hms)" "Last mod" "State")
+ (format #t fmtstr "===" "=========" "=========" "========" "=====")
+ (for-each ;; (ip-addr port? mod-time host port start-time pid )
(lambda (server)
- (let* ((mtm (any->number (car server)))
+ (let* ((mtm (any->number (caddr server)))
(mod (if mtm (- (current-seconds) mtm) "unk"))
- (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
- (url (conc (cadr server) ":" (caddr server)))
+ (age (- (current-seconds)(or (any->number mtm) (current-seconds))))
(pid (list-ref server 4))
- (alv (if (number? mod)(< mod 10) #f)))
+ (url (conc (car server) ":" (cadr server)))
+ (alv (if (number? mod)(< mod 360) #f)))
(format #t
fmtstr
pid
url
(seconds->hr-min-sec age)
@@ -979,11 +998,10 @@
(server:kill server)))))
(sort servers (lambda (a b)
(let ((ma (or (any->number (car a)) 9e9))
(mb (or (any->number (car b)) 9e9)))
(> ma mb)))))
- ;; (debug:print-info 1 *default-log-port* "Done with listservers")
(set! *didsomething* #t)
(exit))
(exit))))
;; must do, would have to add checks to many/all calls below
@@ -1383,12 +1401,11 @@
;; IDEA: megatest list -runname blah% ...
;;
(if (or (args:get-arg "-list-runs")
(args:get-arg "-list-db-targets"))
(if (launch:setup)
- (let* (;; (dbstruct (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local")))
- (runpatt (args:get-arg "-list-runs"))
+ (let* ((runpatt (args:get-arg "-list-runs"))
(access-mode (db:get-access-mode))
(testpatt (common:args-get-testpatt #f))
;; (if (args:get-arg "-testpatt")
;; (args:get-arg "-testpatt")
;; "%"))
@@ -1433,10 +1450,15 @@
db:test-record-fields
t)))
(adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) ;; '("id"))))
(steps-spec (alist-ref "steps" fields-spec equal?))
(test-field-index (make-hash-table)))
+ (if (and (args:get-arg "-dumpmode")
+ (not (member (args:get-arg "-dumpmode") '("sexpr" "json" "ods" "list"))))
+ (begin
+ (debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
+ (exit)))
(if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec
(let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec)))
(if (null? invalid-tests-spec)
;; generate the lookup map test-field-name => index-number
(let loop ((hed (car adj-tests-spec))
@@ -1488,11 +1510,11 @@
;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header "id")) targetstr runname "meta" "id" )
;; (mutils:hierhash-set! data (db:get-value-by-header run header "event_time") targetstr runname "meta" "event_time" )
;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment") targetstr runname "meta" "comment" )
;; ;; add last entry twice - seems to be a bug in hierhash?
;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment") targetstr runname "meta" "comment" )
- (else
+ ((#f list)
(if (null? runs-spec)
(print "Run: " targetstr "/" runname
" status: " (db:get-value-by-header run header "state")
" run-id: " run-id ", number tests: " (length tests)
" event_time: " (db:get-value-by-header run header "event_time"))
@@ -1504,11 +1526,14 @@
(lambda (field-name)
(if (equal? field-name "target")
(display (conc "target: " targetstr " "))
(display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " "))))
runs-spec)
- (newline)))))
+ (newline))))
+ (else
+ (debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
+ ))
(for-each
(lambda (test)
(common:debug-handle-exceptions #f
exn
@@ -2054,11 +2079,11 @@
(if (args:get-arg "-extract-ods")
(general-run-call
"-extract-ods"
"Make ods spreadsheet"
(lambda (target runname keys keyvals)
- (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: #t))
+ (let ((dbstruct (make-dbr:dbstruct areapath: *toppath* local: #t))
(outputfile (args:get-arg "-extract-ods"))
(runspatt (or (args:get-arg "-runname")(args:get-arg ":runname")))
(pathmod (args:get-arg "-pathmod")))
;; (keyvalalist (keys->alist keys "%")))
(debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals)
@@ -2321,10 +2346,16 @@
(begin
(if (not (launch:setup))
(begin
(debug:print 0 *default-log-port* "Failed to setup, exiting")
(exit 1)))
+
+;; (if (not (server:choose-server *toppath* 'home?))
+;; (begin
+;; (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db")
+;; (exit 1)))
+
(let ((dbstructs (db:setup #f)))
(common:cleanup-db dbstructs))
(set! *didsomething* #t)))
(if (args:get-arg "-mark-incompletes")
@@ -2379,11 +2410,13 @@
(if (or (getenv "MT_RUNSCRIPT")
(args:get-arg "-repl")
(args:get-arg "-load"))
(let* ((toppath (launch:setup))
(dbstructs (if (and toppath
- (server:choose-server toppath 'home?))
+ ;; NOTE: server:choose-server is starting a server
+ ;; either add equivalent for tcp mode or ????
+ #;(server:choose-server toppath 'home?))
(db:setup #t)
#f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
(if *toppath*
(cond
((getenv "MT_RUNSCRIPT")
Index: mlaunch.scm
==================================================================
--- mlaunch.scm
+++ mlaunch.scm
@@ -28,6 +28,8 @@
(use sqlite3 srfi-1 posix regex regex-case srfi-69 format)
(declare (unit mlaunch))
(declare (uses db))
(declare (uses common))
+(declare (uses commonmod))
+(import commonmod)
Index: monitor.scm
==================================================================
--- monitor.scm
+++ monitor.scm
@@ -23,10 +23,12 @@
(declare (unit runs))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
ADDED mtargs/mtargs.egg
Index: mtargs/mtargs.egg
==================================================================
--- /dev/null
+++ mtargs/mtargs.egg
@@ -0,0 +1,7 @@
+((license "LGPL")
+ (version 0.1)
+ (category misc)
+ (dependencies srfi-69 srfi-1)
+ (author "Matt Welland")
+ (synopsis "Primitive argument processor.")
+ (components (extension mtargs)))
Index: mtargs/mtargs.scm
==================================================================
--- mtargs/mtargs.scm
+++ mtargs/mtargs.scm
@@ -18,28 +18,45 @@
(module mtargs
(
arg-hash
get-arg
+ get-arg-number
get-arg-from
- usage
get-args
+ usage
print-args
any-defined?
- help
- )
-
-(import scheme chicken data-structures extras posix ports files)
-(use srfi-69 srfi-1)
-
-(define arg-hash (make-hash-table))
-(define help "")
+ )
+
+(import scheme) ;; gives us cond-expand in chicken-4
+
+(cond-expand
+ (chicken-5
+ (import scheme (chicken base) (chicken port) (chicken file) (chicken process-context))
+ (import srfi-69 srfi-1))
+ (chicken-4
+ (import chicken posix srfi-69 srfi-1))
+ (else))
+
+(define usage (make-parameter print))
+(define arg-hash (make-hash-table))
(define (get-arg arg . default)
(if (null? default)
(hash-table-ref/default arg-hash arg #f)
(hash-table-ref/default arg-hash arg (car default))))
+
+;; get an arg as a number
+(define (get-arg-number arg . default)
+ (let* ((val-str (get-arg arg))
+ (val (if val-str (string->number val-str) #f)))
+ (if val
+ val
+ (if (null? default)
+ #f
+ default))))
(define (any-defined? . args)
(not (null? (filter (lambda (x) x)
(map get-arg args)))))
@@ -48,28 +65,10 @@
(define (get-arg-from ht arg . default)
(if (null? default)
(hash-table-ref/default ht arg #f)
(hash-table-ref/default ht arg (car default))))
-(define (usage . args)
- (if (> (length args) 0)
- (apply print "ERROR: " args))
- (if (string? help)
- (print help)
- (print "Usage: " (car (argv)) " ... "))
- (exit 0))
-
- ;; one-of args defined
-(define (any-defined? . param)
- (let ((res #f))
- (for-each
- (lambda (arg)
- (if (get-arg arg)(set! res #t)))
- param)
- res))
-
-;; args:
(define (get-args args params switches arg-hash num-needed)
(let* ((numtargs (length args))
(adj-num-needed (if num-needed (+ num-needed 2) #f)))
(if (< numtargs (if adj-num-needed adj-num-needed 2))
(if (>= num-needed 1)
@@ -94,13 +93,12 @@
(else
(if (null? tail)(append remtargs (list arg)) ;; return the non-used args
(loop (car tail)(cdr tail)(append remtargs (list arg))))))))
))
-(define (print-args remtargs arg-hash)
- (print "ARGS: " remtargs)
+(define (print-args arg-hash)
(for-each (lambda (arg)
(print " " arg " " (hash-table-ref/default arg-hash arg #f)))
(hash-table-keys arg-hash)))
)
Index: mtexec.scm
==================================================================
--- mtexec.scm
+++ mtexec.scm
@@ -29,10 +29,12 @@
;; (declare (uses common))
(declare (uses margs))
(declare (uses configf))
;; (declare (uses rmt))
+(declare (uses commonmod))
+(import commonmod)
;; (use ducttape-lib)
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
Index: mtut.scm
==================================================================
--- mtut.scm
+++ mtut.scm
@@ -30,10 +30,13 @@
(declare (uses common))
(declare (uses margs))
(declare (uses configf))
;; (declare (uses rmt))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
+(import commonmod)
(use ducttape-lib)
(include "megatest-fossil-hash.scm")
Index: newdashboard.scm
==================================================================
--- newdashboard.scm
+++ newdashboard.scm
@@ -29,10 +29,12 @@
(prefix dbi dbi:))
(declare (uses common))
(declare (uses megatest-version))
(declare (uses margs))
+(declare (uses commonmod))
+(import commonmod)
;; (declare (uses launch))
;; (declare (uses gutils))
;; (declare (uses db))
;; (declare (uses server))
Index: ods.scm
==================================================================
--- ods.scm
+++ ods.scm
@@ -17,10 +17,12 @@
;;
(use csv-xml regex)
(declare (unit ods))
(declare (uses common))
+(declare (uses commonmod))
+(import commonmod)
(define ods:dirs
'("Configurations2"
"Configurations2/toolpanel"
"Configurations2/menubar"
Index: portlogger.scm
==================================================================
--- portlogger.scm
+++ portlogger.scm
@@ -64,11 +64,11 @@
exn
(begin
;; (release-dot-lock fname)
(debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
- (debug:print 5 *default-log-port* "exn=" (condition->list exn))
+ ;; (debug:print 5 *default-log-port* "exn=" (condition->list exn))
(if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it
(print-call-chain (current-error-port)))
(let* (;; (lock (obtain-dot-lock fname 2 9 10))
(db (portlogger:open-db fname))
(res (apply proc db params)))
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -21,16 +21,32 @@
(use format typed-records) ;; RADT => purpose of json format??
(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
+(declare (uses commonmod))
(declare (uses dbfile))
+;; (declare (uses dbmemmod))
+(declare (uses dbmod))
+(declare (uses tcp-transportmod))
(include "common_records.scm")
;; (declare (uses rmtmod))
+;; used by http-transport
(import dbfile) ;; rmtmod)
+(import commonmod
+;; dbmemmod
+ dbfile
+ dbmod
+ tcp-transportmod)
+
+;; http - use the old http + in /tmp db
+;; tcp - use tcp transport with inmem db
+;; nfs - use direct to disk access (read-only)
+;;
+(define rmt:transport-mode (make-parameter 'http))
;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;
;; generate entries for ~/.megatestrc with the following
@@ -42,61 +58,63 @@
;;======================================================================
;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
-(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
- (let* ((runremote (or area-dat *runremote*))
- (cinfo (if (remote? runremote)
- (remote-conndat runremote)
+(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
+ (let* ((cinfo (if (and (remote? runremote)
+ (remote-api-url runremote)) ;; we have a connection
+ runremote
#f)))
- (if cinfo
- cinfo
- (if (server:check-if-running areapath)
- (client:setup areapath)
- #f))))
+ (if cinfo
+ cinfo
+ (if (server:check-if-running areapath)
+ (client:setup areapath runremote)
+ #f))))
(define (rmt:on-homehost? runremote)
(let* ((hh-dat (remote-hh-dat runremote)))
(if (pair? hh-dat)
(cdr hh-dat)
(begin
(debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
#f))))
+(define (make-and-init-remote areapath)
+ (case (rmt:transport-mode)
+ ((http)(make-remote))
+ ((tcp) (tt:make-remote areapath))
+ (else #f)))
;;======================================================================
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
-
- #;(common:telemetry-log (conc "rmt:"(->string cmd))
- payload: `((rid . ,rid)
- (params . ,params)))
-
+ (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.")
(if (> attemptnum 2)
(debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
-
+
(cond
((> attemptnum 2) (thread-sleep! 0.05))
((> attemptnum 10) (thread-sleep! 0.5))
((> attemptnum 20) (thread-sleep! 1)))
+
+ ;; I'm turning this off, it may make sense to move it
+ ;; into http-transport-handler
(if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))
- (begin (server:run *toppath*) (thread-sleep! 3)))
-
-
- ;;DOT digraph megatest_state_status {
- ;;DOT ranksep=0;
- ;;DOT // rankdir=LR;
- ;;DOT node [shape="box"];
- ;;DOT "rmt:send-receive" -> MUTEXLOCK;
- ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
- ;; do all the prep locked under the rmt-mutex
- (mutex-lock! *rmt-mutex*)
+ (begin
+ (debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.")
+ (case (rmt:transport-mode)
+ ((http)
+ (server:run *toppath*)
+ (thread-sleep! 3))
+ (else
+ (thread-sleep! 1) ;; for tcp the server is started by routines in tcp-transportmod. For nfs there is no server
+ ))))
;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
;; 3. do the query, if on homehost use local access
;;
@@ -103,249 +121,192 @@
(let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
(areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
(runremote (or area-dat
*runremote*))
(attemptnum (+ 1 attemptnum))
- (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
-
- ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
- ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
- ;; ensure we have a record for our connection for given area
- (if (not runremote) ;; can remove this one. should never get here.
- (begin
- (set! *runremote* (make-remote))
- (let* ((server-info (remote-server-info *runremote*)))
- (if server-info
- (begin
- (remote-server-url-set! *runremote* (server:record->url server-info))
- (remote-server-id-set! *runremote* (server:record->id server-info)))))
- (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
-
- ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
- ;; DOT SET_HOMEHOST -> MUTEXLOCK;
- ;; ensure we have a homehost record
- (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
- (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
- (let ((hh-data (server:choose-server areapath 'homehost)))
- (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
-
- ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
- (cond
- #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
- (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
- (set! *runremote* #f)
- ;; BUG: close-connections should go here?
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
-
- ;;DOT EXIT;
- ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
- ;; give up if more than 150 attempts
- ((> attemptnum 150)
- (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
- (exit 1))
-
- ;;DOT CASE2 [label="local\nreadonly\nquery"];
- ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
- ;;DOT CASE2 -> "rmt:open-qry-close-locally";
- ;; readonly mode, read request- handle it - case 2
- ((and readonly-mode
- (member cmd api:read-only-queries))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
- (rmt:open-qry-close-locally cmd 0 params)
- )
-
- ;;DOT CASE3 [label="write in\nread-only mode"];
- ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
- ;;DOT CASE3 -> "#f";
- ;; readonly mode, write request. Do nothing, return #f
- (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
-
- ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
- ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
- ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
- ;;
- ;;DOT CASE4 [label="reset\nconnection"];
- ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
- ;;DOT CASE4 -> "rmt:send-receive";
- ;; reset the connection if it has been unused too long
- ((and runremote
- (remote-conndat runremote)
- (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
- (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
- (remote-server-timeout runremote))))
- (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
- (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
- (http-transport:close-connections area-dat: runremote)
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE5 [label="local\nread"];
- ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
- ;;DOT CASE5 -> "rmt:open-qry-close-locally";
-
- ;; on homehost and this is a read
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (rmt:on-homehost? runremote)
- (member cmd api:read-only-queries)) ;; this is a read
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE6 [label="init\nremote"];
- ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
- ;;DOT CASE6 -> "rmt:send-receive";
- ;; on homehost and this is a write, we already have a server, but server has died
-
- ;; reinstate this keep-alive section but inject a time condition into the (add ...
-
- #;((and (cdr (remote-hh-dat runremote)) ;; on homehost
- (not (member cmd api:read-only-queries)) ;; this is a write
- (remote-server-url runremote) ;; have a server
- (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
- (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
- (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
- (set! *runremote* (make-remote))
- (let* ((server-info (remote-server-info *runremote*)))
- (if server-info
- (begin
- (remote-server-url-set! *runremote* (server:record->url server-info))
- (remote-server-id-set! *runremote* (server:record->id server-info)))))
- (remote-force-server-set! runremote (common:force-server?))
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
- (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE7 [label="homehost\nwrite"];
- ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
- ;;DOT CASE7 -> "rmt:open-qry-close-locally";
- ;; on homehost and this is a write, we already have a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; on homehost
- (not (member cmd api:read-only-queries)) ;; this is a write
- (remote-server-url runremote)) ;; have a server
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
- (rmt:open-qry-close-locally cmd 0 params))
-
- ;;DOT CASE8 [label="force\nserver"];
- ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
- ;;DOT CASE8 -> "rmt:open-qry-close-locally";
- ;; on homehost, no server contact made and this is a write, passively start a server
- ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
- (cdr (remote-hh-dat runremote)) ;; have homehost
- (not (remote-server-url runremote)) ;; no connection yet
- (not (member cmd api:read-only-queries))) ;; not a read-only query
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
- (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
- (if server-info
- (begin
- (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
- (remote-server-id-set! runremote (server:record->id server-info)))
- (if (common:force-server?)
- (server:start-and-wait *toppath*)
- (server:kind-run *toppath*)))
+ (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))
+ (testsuite (common:get-testsuite-name))
+ (mtexe (common:find-local-megatest)))
+
+ (case (rmt:transport-mode)
+ ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))
+ ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
+ ((nfs) (nfs:transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
+ )))
+
+(define (nfs:transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
+ (let* ((keys (common:get-fields *configdat*))
+ (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath)))
+ (api:dispatch-request dbstruct cmd run-id params)))
+
+(define (tcp-transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
+ (if (not runremote)
+ (let* ((newremote (make-and-init-remote areapath)))
+ (set! *runremote* newremote)
+ (set! runremote newremote)))
+ (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id)))
+ (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))
+
+(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)
+ ;; do all the prep locked under the rmt-mutex
+ (mutex-lock! *rmt-mutex*)
+
+ ;; ensure we have a record for our connection for given area
+ (if (not runremote) ;; can remove this one. should never get here.
+ (begin
+ (set! *runremote* (make-and-init-remote areapath))
+ (let* ((server-info (remote-server-info *runremote*)))
+ (if server-info
+ (begin
+ (remote-server-url-set! *runremote* (server:record->url server-info))
+ (remote-server-id-set! *runremote* (server:record->id server-info)))))
+ (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
+
+ ;; ensure we have a homehost record
+ (if (or (not (pair? (remote-hh-dat runremote))) ;; not on homehost
+ (not (cdr (remote-hh-dat runremote)))) ;; not on homehost
+ (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
+ (let ((hh-data (server:choose-server areapath 'homehost)))
+ (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
+
+ (cond
+ ;; give up if more than 150 attempts
+ ((> attemptnum 150)
+ (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
+ (exit 1))
+
+ ;; readonly mode, read request- handle it - case 2
+ ((and readonly-mode
+ (member cmd api:read-only-queries))
+ (mutex-unlock! *rmt-mutex*)
+ (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
+ (rmt:open-qry-close-locally cmd 0 params)
+ )
+
+ ;; readonly mode, write request. Do nothing, return #f
+ (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
+
+ ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
+ ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
+ ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
+ ;;
+ ;; reset the connection if it has been unused too long
+ ((and runremote
+ (remote-api-url runremote)
+ (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
+ (+ (remote-last-access runremote)
+ (remote-server-timeout runremote))))
+ (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses in " (remote-server-timeout runremote) " seconds, forcing new connection.")
+ (http-transport:close-connections runremote)
+ ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
+ ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
+ (mutex-unlock! *rmt-mutex*)
+ (rmt:send-receive cmd rid params attemptnum: attemptnum))
+
+ ;; on homehost and this is a read
+ ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+ (rmt:on-homehost? runremote)
+ (member cmd api:read-only-queries)) ;; this is a read
+ (mutex-unlock! *rmt-mutex*)
+ (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
+ (rmt:open-qry-close-locally cmd 0 params))
+
+ ;; on homehost and this is a write, we already have a server
+ ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+ (cdr (remote-hh-dat runremote)) ;; on homehost
+ (not (member cmd api:read-only-queries)) ;; this is a write
+ (remote-server-url runremote)) ;; have a server (needed to sync written data back)
+ (mutex-unlock! *rmt-mutex*)
+ (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
+ (rmt:open-qry-close-locally cmd 0 params))
+
+ ;; on homehost, no server contact made and this is a write, passively start a server
+ ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
+ (cdr (remote-hh-dat runremote)) ;; have homehost
+ (not (remote-server-url runremote)) ;; no connection yet
+ (not (member cmd api:read-only-queries))) ;; not a read-only query
+ (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
+ (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
+ (if server-info
+ (begin
+ (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
+ (remote-server-id-set! runremote (server:record->id server-info)))
+ (if (common:force-server?)
+ (server:start-and-wait *toppath*)
+ (server:kind-run *toppath*)))
(remote-force-server-set! runremote (common:force-server?))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
(rmt:open-qry-close-locally cmd 0 params)))
- ;;DOT CASE9 [label="force server\nnot on homehost"];
- ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
- ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
- ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
- (not (remote-conndat runremote)))
- (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
- (not (remote-conndat runremote)))) ;; and no connection
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
- (mutex-unlock! *rmt-mutex*)
- (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
- (server:start-and-wait *toppath*))
- (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
- (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
-
- ;;DOT CASE10 [label="on homehost"];
- ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
- ;;DOT CASE10 -> "rmt:open-qry-close-locally";
- ;; all set up if get this far, dispatch the query
- ((and (not (remote-force-server runremote))
- (cdr (remote-hh-dat runremote))) ;; we are on homehost
- (mutex-unlock! *rmt-mutex*)
- (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
- (rmt:open-qry-close-locally cmd (if rid rid 0) params))
-
- ;;DOT CASE11 [label="send_receive"];
- ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
- ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
- ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
- ;; not on homehost, do server query
- (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
- ;;DOT }
-
-;; No Title
-;; Error: (vector-ref) out of range
-;; #(# (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299)))
-;; 6
-;;
-;; Call history:
-;;
-;; http-transport.scm:306: thread-terminate!
-;; http-transport.scm:307: debug:print-info
-;; common_records.scm:235: debug:debug-mode
-;; rmt.scm:259: k587
-;; rmt.scm:259: g591
-;; rmt.scm:276: http-transport:server-dat-update-last-access
-;; http-transport.scm:364: current-seconds
-;; rmt.scm:282: debug:print-info
-;; common_records.scm:235: debug:debug-mode
-;; rmt.scm:283: mutex-unlock!
-;; rmt.scm:287: extras-transport-succeded <--
-;; +-----------------------------------------------------------------------------+
-;; | Exit Status : 70
-;;
+ ;;DOT CASE9 [label="force server\nnot on homehost"];
+ ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
+ ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
+ ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
+ (not (remote-api-url runremote)))
+ (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
+ (not (remote-api-url runremote)))) ;; and no connection
+ (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " runremote: " (remote->alist runremote))
+ (mutex-unlock! *rmt-mutex*)
+ (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
+ (server:start-and-wait *toppath*))
+ ;; was: (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
+ (set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
+ (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
+
+ ;;DOT CASE10 [label="on homehost"];
+ ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
+ ;;DOT CASE10 -> "rmt:open-qry-close-locally";
+ ;; all set up if get this far, dispatch the query
+ ((and (not (remote-force-server runremote))
+ (cdr (remote-hh-dat runremote))) ;; we are on homehost
+ (mutex-unlock! *rmt-mutex*)
+ (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
+ (rmt:open-qry-close-locally cmd (if rid rid 0) params))
+
+ ;;DOT CASE11 [label="send_receive"];
+ ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
+ ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
+ ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
+ ;; not on homehost, do server query
+ (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))
;; bunch of small functions factored out of send-receive to make debug easier
;;
(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
;; (mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
;; (mutex-lock! *rmt-mutex*)
- (let* ((conninfo (remote-conndat runremote))
- (dat-in (case (remote-transport runremote)
- ((http) (condition-case ;; handling here has
- ;; caused a lot of
- ;; problems. However it
- ;; is needed to deal with
- ;; attemtped
- ;; communication to
- ;; servers that have gone
- ;; away
- (http-transport:client-api-send-receive 0 conninfo cmd params)
- ((servermismatch) (vector #f "Server id mismatch" ))
- ((commfail)(vector #f "communications fail"))
- ((exn)(vector #f "other fail" (print-call-chain)))))
- (else
- (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
- (exit))))
+ (let* (;; (conninfo (remote-conndat runremote))
+ (dat-in (condition-case ;; handling here has
+ ;; caused a lot of
+ ;; problems. However it
+ ;; is needed to deal with
+ ;; attemtped
+ ;; communication to
+ ;; servers that have gone
+ ;; away
+ (http-transport:client-api-send-receive 0 runremote cmd params)
+ ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
+ ((servermismatch) (vector #f "Server id mismatch" ))
+ ((commfail)(vector #f "communications fail"))
+ ((exn)(vector #f "other fail" (print-call-chain)))))
(dat (if (and (vector? dat-in) ;; ... check it is a correct size
(> (vector-length dat-in) 1))
dat-in
(vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
(success (if (vector? dat) (vector-ref dat 0) #f))
(res (if (vector? dat) (vector-ref dat 1) #f)))
- (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
- (http-transport:server-dat-update-last-access conninfo) ;; refresh access time
+ (if (and (remote? runremote)
+ (remote-api-url runremote)) ;; (and (vector? conninfo) (< 5 (vector-length conninfo)))
+ (remote-last-access-set! runremote (current-seconds)) ;; refresh access time
(begin
- (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
- (set! conninfo #f)
- (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global.
- (http-transport:close-connections area-dat: runremote)))
- (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
+ (debug:print 0 *default-log-port* "INFO: Should not get here! runremote="(remote->alist runremote))
+ ;; (set! conninfo #f)
+ (http-transport:close-connections runremote)))
+ (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. runremote=" (remote->alist runremote) " dat=" dat " runremote = " runremote)
(mutex-unlock! *rmt-mutex*)
(if success ;; success only tells us that the transport was
;; successful, have to examine the data to see if
;; there was a detected issue at the other end
(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
@@ -392,11 +353,11 @@
res))
(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
(let* ((qry-is-write (not (member cmd api:read-only-queries)))
(db-file-path (db:dbfile-path)) ;; 0))
- (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
+ (dbstructs-local (db:setup #t))
(read-only (not (file-write-access? db-file-path)))
(start (current-milliseconds))
(resdat (if (not (and read-only qry-is-write))
(let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
@@ -432,18 +393,13 @@
(mutex-lock! *db-multi-sync-mutex*)
/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client)
(mutex-unlock! *db-multi-sync-mutex*)))))
res))
-(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
+(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
(let* ((run-id (if run-id run-id 0))
- (res ;; (handle-exceptions
- ;; exn
- ;; (begin
- ;; (print "transport failed. exn=" exn)
- ;; #f)
- (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; )
+ (res (http-transport:client-api-send-receive run-id runremote cmd params)))
(if (and res (vector-ref res 0))
(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
#f)))
;;======================================================================
@@ -470,15 +426,12 @@
(rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))
;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
-(define (rmt:login-no-auto-client-setup connection-info)
- (case *transport-type* ;; run-id of 0 is just a placeholder
- ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature))))
- ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
- ))
+(define (rmt:login-no-auto-client-setup runremote)
+ (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible
;;
(define (rmt:general-call stmtname run-id . params)
@@ -519,13 +472,14 @@
;;======================================================================
;; K E Y S
;;======================================================================
;; These require run-id because the values come from the run!
+;; however the query must still apply to main.db
;;
(define (rmt:get-key-val-pairs run-id)
- (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
+ (rmt:send-receive 'get-key-val-pairs #f (list run-id)))
(define (rmt:get-keys)
(if *db-keys* *db-keys*
(let ((res (rmt:send-receive 'get-keys #f '())))
(set! *db-keys* res)
@@ -548,11 +502,11 @@
(define (rmt:get-targets)
(rmt:send-receive 'get-targets #f '()))
(define (rmt:get-target run-id)
(assert (number? run-id) "FATAL: Run id required.")
- (rmt:send-receive 'get-target run-id (list run-id)))
+ (rmt:send-receive 'get-target #f (list run-id)))
(define (rmt:get-run-times runpatt targetpatt)
(rmt:send-receive 'get-run-times #f (list runpatt targetpatt )))
@@ -567,20 +521,21 @@
(define (rmt:get-test-id run-id testname item-path)
(assert (number? run-id) "FATAL: Run id required.")
(rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
-;; run-id is NOT used
-;;
(define (rmt:get-test-info-by-id run-id test-id)
(if (number? test-id)
(rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
(begin
(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
(print-call-chain (current-error-port))
#f)))
+(define (rmt:get-test-state-status-by-id run-id test-id)
+ (rmt:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))
+
(define (rmt:test-get-rundir-from-test-id run-id test-id)
(rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
(assert (number? run-id) "FATAL: Run id required.")
@@ -829,10 +784,13 @@
(define (rmt:get-run-state run-id)
(assert (number? run-id) "FATAL: Run id required.")
(rmt:send-receive 'get-run-state #f (list run-id)))
+(define (rmt:get-run-state-status run-id)
+ (assert (number? run-id) "FATAL: Run id required.")
+ (rmt:send-receive 'get-run-state-status #f (list run-id)))
(define (rmt:set-run-status run-id run-status #!key (msg #f))
(assert (number? run-id) "FATAL: Run id required.")
(rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
@@ -1044,23 +1002,36 @@
(rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
(define (rmt:test-get-archive-block-info archive-block-id)
(rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
-
(define (rmtmod:calc-ro-mode runremote *toppath*)
- (if (and runremote
- (remote-ro-mode-checked runremote))
- (remote-ro-mode runremote)
- (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
- (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
- (if runremote
- (begin
- (remote-ro-mode-set! runremote ro-mode)
- (remote-ro-mode-checked-set! runremote #t)
- ro-mode)
- ro-mode))))
+ (case (rmt:transport-mode)
+ ((http)
+ (if (and runremote
+ (remote-ro-mode-checked runremote))
+ (remote-ro-mode runremote)
+ (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
+ (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+ (if runremote
+ (begin
+ (remote-ro-mode-set! runremote ro-mode)
+ (remote-ro-mode-checked-set! runremote #t)
+ ro-mode)
+ ro-mode))))
+ ((tcp)
+ (if (and runremote
+ (tt-ro-mode-checked runremote))
+ (tt-ro-mode runremote)
+ (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
+ (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+ (if runremote
+ (begin
+ (tt-ro-mode-set! runremote ro-mode)
+ (tt-ro-mode-checked-set! runremote #t)
+ ro-mode)
+ ro-mode))))))
(define (extras-readonly-mode rmt-mutex log-port cmd params)
(mutex-unlock! rmt-mutex)
(debug:print-info 12 log-port "rmt:send-receive, case 3")
(debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
@@ -1067,13 +1038,12 @@
#f)
(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
(debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
(mutex-lock! *rmt-mutex*)
- (remote-conndat-set! runremote #f)
- (http-transport:close-connections area-dat: runremote)
- (remote-server-url-set! runremote #f)
+ (http-transport:close-connections runremote)
+ ;; (remote-server-url-set! runremote #f)
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
@@ -1096,11 +1066,11 @@
;; want to ease off
;; the queries
(let ((wait-delay (+ attemptnum (* attemptnum 10))))
(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
(mutex-lock! *rmt-mutex*)
- (http-transport:close-connections area-dat: runremote)
+ (http-transport:close-connections runremote)
(set! *runremote* #f) ;; force starting over
(mutex-unlock! *rmt-mutex*)
(thread-sleep! wait-delay)
(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
res)) ;; All good, return res
Index: runconfig.scm
==================================================================
--- runconfig.scm
+++ runconfig.scm
@@ -22,10 +22,12 @@
(use format directory-utils)
(declare (unit runconfig))
(declare (uses common))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(define (runconfig:read fname target environ-patt)
(let ((ht (make-hash-table)))
Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -22,10 +22,11 @@
sxml-modifications matchable)
(declare (unit runs))
(declare (uses db))
(declare (uses common))
+(declare (uses commonmod))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
@@ -37,10 +38,12 @@
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
;; (include "debugger.scm")
+
+(import commonmod)
;; use this struct to facilitate refactoring
;;
(defstruct runs:dat
@@ -1279,15 +1282,23 @@
(list hed tal reg reruns))
;; If no resources are available just kill time and loop again
;;
((not have-resources) ;; simply try again after waiting a second
- (if (runs:lownoise "no resources" 60)
+ (if (runs:lownoise "no resources" 600)
(debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
+
;; Have gone back and forth on this but db starvation is an issue.
;; wait one second before looking again to run jobs.
- (thread-sleep! 0.25)
+ ;; (thread-sleep! 0.25)
+
+ ;; new logic.
+ ;; If it has been more than 10 seconds since we were last here don't wait at all
+ ;; otherwise sleep 2 seconds to give db a rest and let dashboard read data
+ (if (runs:lownoise "frequent-no-resources" 10)
+ (thread-sleep! 0.25) ;; no significant delay
+ (thread-sleep! 2))
;; could have done hed tal here but doing car/cdr of newtal to rotate tests
(list (car newtal)(cdr newtal) reg reruns))
;; This is the final stage, everything is in place so launch the test
;;
@@ -1775,11 +1786,11 @@
(last-jobs-check-time (runs:dat-last-jobs-check-time runsdat))
(should-check-jobs (match can-run-more-tests
((can-run-more-flag num-running nr-in-jobgroup max-concurrent-jobs . params)
(if (< (- max-concurrent-jobs num-running) 25)
(begin
- (debug:print-info 0 *default-log-port*
+ (debug:print-info 2 *default-log-port*
"less than 20 jobs headroom, ("max-concurrent-jobs
"-"num-running")>20. Forcing prelaunch check.")
#t)
#f))
(else #f)))) ;; no record yet
@@ -1855,11 +1866,12 @@
(newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path
(tests:testqueue-set-items! new-test-record #f)
(tests:testqueue-set-itemdat! new-test-record my-itemdat)
(tests:testqueue-set-item_path! new-test-record my-item-path)
(hash-table-set! test-records newtestname new-test-record)
- (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath
+ ;; BUG: This next line sucks up a lot of horsepower
+ (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath
items-in-testpatt)))
;; At this point we have possibly added items to tal but all must be handed off to
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -65,21 +65,10 @@
;;======================================================================
;; Call this to start the actual server
;;
-;; all routes though here end in exit ...
-;;
-;; start_server
-;;
-(define (server:launch run-id transport-type)
- (case transport-type
- ((http)(http-transport:launch))
- ;;((nmsg)(nmsg-transport:launch run-id))
- ;;((rpc) (rpc-transport:launch run-id))
- (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
-
;;======================================================================
;; S E R V E R U T I L I T I E S
;;======================================================================
;; Get the transport
@@ -112,39 +101,32 @@
(if *server-id* *server-id*
(let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
(set! *server-id* sig)
*server-id*)))
-;; When using zmq this would send the message back (two step process)
-;; with spiffy or rpc this simply returns the return data to be returned
-;;
-(define (server:reply return-addr query-sig success/fail result)
- (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
- ;; (send-message pubsock target send-more: #t)
- ;; (send-message pubsock
- (case (server:get-transport)
- ((rpc) (db:obj->string (vector success/fail query-sig result)))
- ((http) (db:obj->string (vector success/fail query-sig result)))
- ((fs) result)
- (else
- (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
- result)))
+;; ;; When using zmq this would send the message back (two step process)
+;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;;
+;; (define (server:reply return-addr query-sig success/fail result)
+;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;; ;; (send-message pubsock target send-more: #t)
+;; ;; (send-message pubsock
+;; (case (server:get-transport)
+;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
+;; ((http) (db:obj->string (vector success/fail query-sig result)))
+;; ((fs) result)
+;; (else
+;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;; result)))
;; Given an area path, start a server process ### NOTE ### > file 2>&1
;; if the target-host is set
;; try running on that host
;; incidental: rotate logs in logs/ dir.
;;
(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
- (let* (;; (curr-host (get-host-name))
- ;; (attempt-in-progress (server:start-attempted? areapath))
- ;; (dot-server-url (server:check-if-running areapath))
- ;; (curr-ip (server:get-best-guess-address curr-host))
- ;; (curr-pid (current-process-id))
- ;; (homehost (server:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
- ;; (target-host (car homehost))
- (testsuite (common:get-testsuite-name))
+ (let* ((testsuite (common:get-testsuite-name))
(logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
(profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
""))
(cmdln (conc (common:get-megatest-exe)
" -server - ";; (or target-host "-")
@@ -190,46 +172,48 @@
(let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
(dbprep-rx (regexp "^SERVER: dbprep"))
(dbprep-found 0)
(bad-dat (list #f #f #f #f #f)))
(handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn)
- bad-dat) ;; no idea what went wrong, call it a bad server
- (with-input-from-file
- logf
- (lambda ()
- (let loop ((inl (read-line))
- (lnum 0))
- (if (not (eof-object? inl))
- (let ((mlst (string-match server-rx inl))
- (dbprep (string-match dbprep-rx inl)))
- (if dbprep (set! dbprep-found 1))
- (if (not mlst)
- (if (< lnum 500) ;; give up if more than 500 lines of server log read
- (loop (read-line)(+ lnum 1))
- (begin
+ exn
+ (begin
+ ;; WARNING: this is potentially dangerous to blanket ignore the errors
+ (if (file-exists? logf)
+ (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
+ bad-dat) ;; no idea what went wrong, call it a bad server
+ (with-input-from-file
+ logf
+ (lambda ()
+ (let loop ((inl (read-line))
+ (lnum 0))
+ (if (not (eof-object? inl))
+ (let ((mlst (string-match server-rx inl))
+ (dbprep (string-match dbprep-rx inl)))
+ (if dbprep (set! dbprep-found 1))
+ (if (not mlst)
+ (if (< lnum 500) ;; give up if more than 500 lines of server log read
+ (loop (read-line)(+ lnum 1))
+ (begin
(debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
bad-dat))
- (match mlst
- ((_ host port start server-id pid)
- (list host
- (string->number port)
- (string->number start)
- server-id
- (string->number pid)))
- (else
- (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
- bad-dat))))
- (begin
- (if dbprep-found
- (begin
- (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
- (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
- (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
- bad-dat))))))))
+ (match mlst
+ ((_ host port start server-id pid)
+ (list host
+ (string->number port)
+ (string->number start)
+ server-id
+ (string->number pid)))
+ (else
+ (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
+ bad-dat))))
+ (begin
+ (if dbprep-found
+ (begin
+ (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
+ (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
+ (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
+ bad-dat))))))))
;; ;; get a list of servers from the log files, with all relevant data
;; ;; ( mod-time host port start-time pid )
;; ;;
;; (define (server:get-list areapath #!key (limit #f))
@@ -419,11 +403,12 @@
;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
(define (server:get-servers-info areapath)
- (let* ((servinfodir (conc *toppath*"/.servinfo")))
+ ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
+ (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
(if (not (file-exists? servinfodir))
(create-directory servinfodir))
(let* ((allfiles (glob (conc servinfodir"/*")))
(res (make-hash-table)))
(for-each
@@ -432,15 +417,45 @@
(serverdat (server:logf-get-start-info f)))
(match serverdat
((host port start server-id pid)
(if (and host port start server-id pid)
(hash-table-set! res hostport serverdat)
- (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat)))
+ (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
(else
- (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat)))))
+ (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
allfiles)
res)))
+
+;; check the .servinfo directory, are there other servers running on this
+;; or another host?
+;;
+;; returns #t => ok to start another server
+;; #f => not ok to start another server
+;;
+(define (server:minimal-check areapath)
+ (server:clean-up-old areapath)
+ (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
+ (servrs (glob (conc srvdir"/*")))
+ (thishostip (server:get-best-guess-address (get-host-name)))
+ (thisservrs (glob (conc srvdir"/"thishostip":*")))
+ (homehostinf (server:choose-server areapath 'homehost))
+ (havehome (car homehostinf))
+ (wearehome (cdr homehostinf)))
+ (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
+ ", numservers: "(length thisservrs))
+ (cond
+ ((not havehome) #t) ;; no homehost yet, go for it
+ ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
+ ((and havehome (not wearehome)) #f) ;; we are not the home host
+ ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
+ (else
+ (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
+ #t))))
+
+
+(define server-last-start 0)
+
;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
;; mode:
@@ -453,29 +468,47 @@
;; 1. sort by age ascending and ping until good
;; find alive rand from youngest
;; 1. sort by age descending
;; 2. take five
;; 3. check alive, discard if not and repeat
+ ;; first we clean up old server files
+ (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode))
+ (server:clean-up-old areapath)
+ (let* ((since-last (- (current-seconds) server-last-start))
+ (server-start-delay 10))
+ (if ( < (- (current-seconds) server-last-start) 10 )
+ (begin
+ (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+ (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
+ (thread-sleep! server-start-delay)
+ )
+ (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+ )
+ )
(let* ((serversdat (server:get-servers-info areapath))
(servkeys (hash-table-keys serversdat))
- (by-time-asc (if (not (null? servkeys))
+ (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
(sort servkeys ;; list of "host:port"
(lambda (a b)
(>= (list-ref (hash-table-ref serversdat a) 2)
(list-ref (hash-table-ref serversdat b) 2))))
'())))
+ (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
+ (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
(if (not (null? by-time-asc))
(let* ((oldest (last by-time-asc))
(oldest-dat (hash-table-ref serversdat oldest))
(host (list-ref oldest-dat 0))
(all-valid (filter (lambda (x)
(equal? host (list-ref (hash-table-ref serversdat x) 0)))
by-time-asc))
- (best-five (lambda ()
- (if (> (length all-valid) 5)
- (take all-valid 5)
- all-valid)))
+ (best-ten (lambda ()
+ (if (> (length all-valid) 11)
+ (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
+ (if (> (length all-valid) 8)
+ (drop-right all-valid 1)
+ all-valid))))
(names->dats (lambda (names)
(map (lambda (x)
(hash-table-ref serversdat x))
names)))
(am-home? (lambda ()
@@ -483,44 +516,81 @@
(bestadrs (server:get-best-guess-address currhost)))
(or (equal? host currhost)
(equal? host bestadrs))))))
(case mode
((info)
- (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
- (print "youngest: "(hash-table-ref serversdat (car all-valid))))
+ (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+ (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid))))
((home) host)
((homehost) (cons host (am-home?))) ;; shut up old code
((home?) (am-home?))
- ((best-five)(names->dats (best-five)))
+ ((best-ten)(names->dats (best-ten)))
((all-valid)(names->dats all-valid))
- ((best) (let* ((best-five (best-five))
- (len (length best-five)))
- (hash-table-ref serversdat (list-ref best-five (random len)))))
+ ((best) (let* ((best-ten (best-ten))
+ (len (length best-ten)))
+ (hash-table-ref serversdat (list-ref best-ten (random len)))))
((count)(length all-valid))
(else
(debug:print 0 *default-log-port* "ERROR: invalid command "mode)
#f)))
(begin
(server:run areapath)
- (thread-sleep! 3)
+ (set! server-last-start (current-seconds))
+ ;; (thread-sleep! 3)
(case mode
((homehost) (cons #f #f))
(else #f))))))
+
+(define (server:get-servinfo-dir areapath)
+ (let* ((spath (conc areapath"/.servinfo")))
+ (if (not (file-exists? spath))
+ (create-directory spath #t))
+ spath))
+
+(define (server:clean-up-old areapath)
+ ;; any server file that has not been touched in ten minutes is effectively dead
+ (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
+ (for-each
+ (lambda (sfile)
+ (let* ((modtime (handle-exceptions
+ exn
+ (begin
+ (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
+ (current-seconds))
+ (file-modification-time sfile))))
+ (if (and (number? modtime)
+ (> (- (current-seconds) modtime)
+ 600))
+ (begin
+ (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
+ (handle-exceptions
+ exn
+ (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
+ (delete-file sfile))))))
+ sfiles)))
;; would like to eventually get rid of this
;;
(define (common:on-homehost?)
- (server:choose-server *toppath* 'home?))
+ (if (eq? (rmt:transport-mode) 'http)
+ (server:choose-server *toppath* 'home?)
+ #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
;; look for $MT_RUN_AREA_HOME/logs/server-start-last
;; and wait for it to be at least seconds old
;; (server:wait-for-server-start-last-flag areapath)
- (if (< (server:choose-server areapath 'count) 10)
+ (let loop ()
+ (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
+ (begin
+ (if (common:low-noise-print 30 "our-host-load")
+ (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
+ (loop))))
+ (if (< (server:choose-server areapath 'count) 20)
(server:run areapath))
#;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
(let* ((lock-file (conc areapath "/logs/server-start.lock")))
(let* ((start-flag (conc areapath "/logs/server-start-last")))
(common:simple-file-lock-and-wait lock-file expire-time: 25)
@@ -538,11 +608,12 @@
(let loop ((server-info (server:check-if-running areapath))
(try-num 0))
(if (or server-info
(> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
(server:record->url server-info)
- (let ((num-ok (length (server:choose-server areapath 'all-valid))))
+ (let* ( (servers (server:choose-server areapath 'all-valid))
+ (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
(if (and (> try-num 0) ;; first time through simply wait a little while then try again
(< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
(server:run areapath))
(thread-sleep! 5)
(loop (server:check-if-running areapath)
@@ -555,11 +626,11 @@
;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;; #!key (numservers "2"))
(let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
- (servers (server:choose-server areapath 'best-five))) ;; (server:get-best (server:get-list areapath))))
+ (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
(if (or (and servers
(null? servers))
(not servers))
;; (and (list? servers)
;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
@@ -587,57 +658,53 @@
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
#f)
- (match-let (((mod-time hostname port start-time server-id pid)
+ (match-let (((hostname port start-time server-id pid)
servr))
(tasks:kill-server hostname pid))))
;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;; in the same process as the server.
;;
-(define (server:ping host-port-in server-id #!key (do-exit #f))
- (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
- #f ;; (server:check-if-running *toppath*)
- ;; (if (number? host-port-in) ;; we were handed a server-id
- ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
- ;; ;; (print "srec: " srec " host-port-in: " host-port-in)
- ;; (if srec
- ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4))
- ;; (conc "no such server-id " host-port-in)))
- host-port-in))) ;; )
- (let* ((host-port (if host:port
- (let ((slst (string-split host:port ":")))
- (if (eq? (length slst) 2)
- (list (car slst)(string->number (cadr slst)))
- #f))
- #f)))
-;; (toppath (launch:setup)))
- ;; (print "host-port=" host-port)
- (if (not host-port)
- (begin
- (if host-port-in
- (debug:print 0 *default-log-port* "ERROR: bad host:port"))
- (if do-exit (exit 1))
- #f)
- (let* ((iface (car host-port))
- (port (cadr host-port))
- (server-dat (http-transport:client-connect iface port server-id))
- (login-res (rmt:login-no-auto-client-setup server-dat)))
- (if (and (list? login-res)
- (car login-res))
- (begin
- ;; (print "LOGIN_OK")
- (if do-exit (exit 0))
- #t)
- (begin
- ;; (print "LOGIN_FAILED")
- (if do-exit (exit 1))
- #f)))))))
+(define (server:ping host:port server-id #!key (do-exit #f))
+ (let* ((host-port (cond
+ ((string? host:port)
+ (let ((slst (string-split host:port ":")))
+ (if (eq? (length slst) 2)
+ (list (car slst)(string->number (cadr slst)))
+ #f)))
+ (else
+ #f))))
+ (cond
+ ((and (list? host-port)
+ (eq? (length host-port) 2))
+ (let* ((myrunremote (make-and-init-remote *toppath*))
+ (iface (car host-port))
+ (port (cadr host-port))
+ (server-dat (client:connect iface port server-id myrunremote))
+ (login-res (rmt:login-no-auto-client-setup myrunremote)))
+ (http-transport:close-connections myrunremote)
+ (if (and (list? login-res)
+ (car login-res))
+ (begin
+ ;; (print "LOGIN_OK")
+ (if do-exit (exit 0))
+ #t)
+ (begin
+ ;; (print "LOGIN_FAILED")
+ (if do-exit (exit 1))
+ #f))))
+ (else
+ (if host:port
+ (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
+ (if do-exit
+ (exit 1)
+ #f)))))
;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
(with-input-from-pipe
@@ -668,11 +735,11 @@
(define (server:expiration-timeout)
(let ((tmo (configf:lookup *configdat* "server" "timeout")))
(if (and (string? tmo)
(common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
(* 3600 (string->number tmo))
- 60)))
+ 600)))
(define (server:get-best-guess-address hostname)
(let ((res #f))
(for-each
(lambda (adr)
Index: subrun.scm
==================================================================
--- subrun.scm
+++ subrun.scm
@@ -23,17 +23,20 @@
call-with-environment-variables)
(declare (unit subrun))
;;(declare (uses runs))
(declare (uses db))
(declare (uses common))
+(declare (uses commonmod))
;;(declare (uses items))
;;(declare (uses runconfig))
;;(declare (uses tests))
;;(declare (uses server))
(declare (uses mt))
;;(declare (uses archive))
;; (declare (uses filedb))
+
+(import commonmod)
;(include "common_records.scm")
;;(include "key_records.scm")
(include "db_records.scm") ;; provides db:test-get-id
;;(include "run_records.scm")
@@ -135,11 +138,11 @@
(subrun:unset-subrun-removed test-run-dir))
(let* ((log-prefix "run")
(switches (subrun:selector+log-switches test-run-dir log-prefix))
(run-wait (equal? run-mode "yes"))
- (cmd (conc "megatest " sub-cmd " " switches" "
+ (cmd (conc (common:get-mtexe)" "sub-cmd" "switches" "
(if run-wait "-run-wait " ""))))
cmd))
(define (subrun:sanitize-path inpath)
@@ -232,20 +235,24 @@
(list (car x) (cdr x)))
switch-alist))
" ")))
res))
+;; NOTE: Here we run sub megatest but this is not intended for one version
+;; of megatest to test another version. Thus we propagate the
(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)
- (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix))
- (cmd (conc "megatest " selector-switches " " action-switches-str ))
+ (let* ((mtpathdir (common:get-megatest-exe-dir))
+ (mtexe (common:get-mtexe))
+ (selector-switches (subrun:selector+log-switches test-run-dir log-prefix))
+ (cmd (conc mtexe" "selector-switches" "action-switches-str ))
(pid #f)
(proc (lambda ()
(debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd)
;;(set! pid (process-run "/usr/bin/xterm" (list ))))))
(set! pid (process-run "/bin/bash" (list "-c" cmd))))))
(call-with-environment-variables
- (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
+ (list (cons "PATH" (common:get-megatest-exe-path)))
(lambda ()
(common:without-vars proc "^MT_.*")))
(let processloop ((i 0))
(let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
(if (eq? pid-val 0)
Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -25,10 +25,12 @@
(declare (uses dbfile))
(declare (uses db))
(declare (uses rmt))
(declare (uses common))
(declare (uses pgdb))
+(declare (uses commonmod))
+(import commonmod)
(import dbfile)
;; (import pgdb) ;; pgdb is a module
(include "task_records.scm")
Index: tcmt.scm
==================================================================
--- tcmt.scm
+++ tcmt.scm
@@ -30,10 +30,12 @@
(declare (uses margs))
(declare (uses rmt))
(declare (uses common))
;; (declare (uses megatest-version))
+(declare (uses commonmod))
+(import commonmod)
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "db_records.scm")
ADDED tcp-transportmod.scm
Index: tcp-transportmod.scm
==================================================================
--- /dev/null
+++ tcp-transportmod.scm
@@ -0,0 +1,727 @@
+;;======================================================================
+;; Copyright 2017, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+
+(declare (unit tcp-transportmod))
+(declare (uses debugprint))
+(declare (uses commonmod))
+(declare (uses dbfile))
+(declare (uses dbmod))
+
+(use address-info)
+
+(module tcp-transportmod
+ *
+
+ (import scheme
+ (prefix sqlite3 sqlite3:)
+ chicken
+ data-structures
+
+ address-info
+ directory-utils
+ extras
+ files
+ hostinfo
+ matchable
+ md5
+ message-digest
+ ports
+ posix
+ regex
+ regex-case
+ s11n
+ srfi-1
+ srfi-18
+ srfi-4
+ srfi-69
+ stack
+ typed-records
+ tcp-server
+ tcp
+
+ debugprint
+ commonmod
+ dbfile
+ dbmod
+ )
+
+;;======================================================================
+;; client
+;;======================================================================
+
+;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
+
+;; Used ONLY for client
+;;
+(defstruct tt-conn
+ host
+ port
+ host-port
+ dbfname
+ server-id
+ server-start
+ pid
+)
+
+;; Used for BOTH clients and servers
+(defstruct tt
+ ;; client related
+ (conns (make-hash-table)) ;; dbfname -> conn
+
+ ;; server related
+ (state 'starting)
+ (areapath #f)
+ (host #f)
+ (port #f)
+ (conn #f)
+ (cleanup-proc #f)
+ (handler #f) ;; receives data and responds
+ (socket #f)
+ (thread #f)
+ (host-port #f)
+ (cmd-thread #f)
+ (ro-mode #f)
+ (ro-mode-checked #f)
+ (last-access (current-seconds))
+ (servinf-file #f)
+ (last-serv-start 0)
+ )
+
+;; make ttdat visible
+(define *server-info* #f)
+
+(define (tt:make-remote areapath)
+ (make-tt areapath: areapath))
+
+;; 1 ... or #f
+(define (tt:valid-run-id run-id)
+ (or (number? run-id)
+ (not run-id)))
+
+;; do all the busy work of finding and setting up conn for
+;; connecting to a server
+;;
+(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
+ (assert (tt:valid-run-id run-id) "FATAL: invalid run-id "run-id)
+ (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
+ (server-start-proc (lambda ()
+ (tt:server-process-run
+ (tt-areapath ttdat)
+ testsuite ;; (dbfile:testsuite-name)
+ (common:find-local-megatest)
+ run-id))))
+ (if conn
+ conn ;; we are already connected to the server
+ (let* ((sdat (tt:get-current-server-info ttdat dbfname)))
+ (match sdat
+ ((host port start-time server-id pid dbfname2 servinffile)
+ (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
+ (let* ((host-port (conc host":"port))
+ (conn (make-tt-conn
+ host: host
+ port: port
+ host-port: host-port
+ dbfname: dbfname
+ servinf-file: servinffile
+ server-id: server-id
+ server-start: start-time
+ pid: pid)))
+ (hash-table-set! (tt-conns ttdat) dbfname conn)
+ ;; verify we can talk to this server
+ (let* ((ping-res (tt:ping host port server-id)))
+ (case ping-res
+ ((running) conn)
+ ((starting)
+ (thread-sleep! 0.5)
+ (tt:client-connect-to-server ttdat dbfname run-id testsuite))
+ (else
+ (let* ((curr-secs (current-seconds)))
+ ;; rm the (last server) would go here
+ (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
+ (begin
+ (tt-last-serv-start-set! ttdat curr-secs)
+ (server-start-proc))) ;; start server if 30 sec since last attempt
+ (thread-sleep! 1)
+ (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
+
+ (else
+ (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers
+ (begin
+ (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
+ (server-start-proc)
+ (tt-last-serv-start-set! ttdat (current-seconds))))
+ (thread-sleep! 1)
+ (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
+
+(define (tt:ping host port server-id)
+ (let* ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id
+ ;;
+ ;; need two threads, one a 5 second timer
+ ;;
+ (match res
+ ((status errmsg result meta)
+ (if (equal? result server-id)
+ (let* ((server-state (alist-ref 'sstate meta)))
+ ;; (debug:print 0 *default-log-port* "Ping to "host":"port" successful.")
+ (or server-state 'unk)) ;; then we are good
+ (begin
+ (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result)
+ #f)))
+ (else
+ ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res)
+ #f))))
+
+;; client side handler
+;;
+;;(tt:handler # get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
+;;
+(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
+ ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
+ (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
+ (if conn
+ ;; have connection, call the server
+ (let* ((res (tt:send-receive ttdat conn cmd run-id params)))
+ ;; res is (status errmsg result meta)
+ (match res
+ ((status errmsg result meta)
+ (if (list? meta)
+ (let* ((delay-wait (alist-ref 'delay-wait meta)))
+ (if (and (number? delay-wait)
+ (> delay-wait 0))
+ (begin
+ (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds")
+ (thread-sleep! delay-wait)))))
+ (case status
+ ((busy) ;; result will be how long the server wants you to delay
+ (debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in "result" seconds.")
+ (thread-sleep! (if (number? result) result 2))
+ (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
+ ((loaded)
+ (debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a 1/4 second.")
+ (thread-sleep! 0.25)
+ (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
+ (else
+ result)))
+ (else
+ (if (not res)
+ (let* ((host (tt-conn-host conn))
+ (port (tt-conn-port conn))
+ ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
+ (pid (tt-conn-pid conn))
+ (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname)))
+ (hash-table-set! (tt-conns ttdat) dbfname #f)
+ (if (file-exists? servinf)
+ (begin
+ (debug:print 0 *default-log-port* "INFO: no ping response from server "host":"port" for "dbfname)
+ (if (and (file-exists? servinf)
+ (> (- (current-seconds)(file-modification-time servinf)) 60))
+ (begin
+ (debug:print 0 *default-log-port* "INFO: "servinf" file seems old and no ping response, removing it.")
+ (delete-file* servinf))))
+ (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf))
+ (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
+ (assert #f "FATAL: tt:handler received bad data "res)))))
+ (begin
+ (thread-sleep! 1) ;; give it a rest and try again
+ (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))))
+
+ ;; no conn yet, find and or start and find a server
+;; (let* ((server (tt:find-server ttdat dbfname)))
+;; (if server
+;; (let* ((conn (tt:client-connect-to-server server)))
+;; (hash-table-set! (tt-conns ttdat) dbfname conn)
+;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode
+;; dbfname testsuite mtexe))
+;; ;; no server, try to start a server process
+;; (begin
+;; (tt:server-process-run areapath testsuite mtexe run-id) ;; #!key (profile-mode ""))
+;; (thread-sleep! 1)
+;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath
+;; readonly-mode dbfname testsuite mtexe)))))))
+
+(define (tt:bid-for-servership run-id)
+ #f)
+
+;; gets server info and appends path to server file
+;; sorts by age, oldest first
+;;
+;; returns list of (host port startseconds server-id servinfofile)
+;;
+(define (tt:get-server-info-sorted ttdat dbfname)
+ (let* ((areapath (tt-areapath ttdat))
+ (sfiles (tt:find-server areapath dbfname))
+ (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
+ (sorted (sort sdats (lambda (a b)
+ (< (list-ref a 2)(list-ref b 2))))))
+ sorted))
+
+(define (tt:get-current-server-info ttdat dbfname)
+ (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
+ ;;
+ ;; TODO - replace most of below with tt;get-server-info-sorted
+ ;;
+ (let* ((areapath (tt-areapath ttdat))
+ (sfiles (tt:find-server areapath dbfname))
+ (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
+ (sorted (sort sdats (lambda (a b)
+ (< (list-ref a 2)(list-ref b 2))))))
+ (if (null? sorted)
+ #f ;; we'll want to wait until extra servers have exited
+ (car sorted))))
+
+(define (tt:send-receive ttdat conn cmd run-id params)
+ (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
+ (host (tt-conn-host conn))
+ (port (tt-conn-port conn))
+ (dat (list cmd run-id params #f))) ;; no meta data yet
+ (tt:send-receive-direct host port dat)))
+
+(define (tt:send-receive-direct host port dat)
+ (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
+ (handle-exceptions
+ exn
+ #f ;; Add condition-case or better handling here
+ (let-values (((inp oup)(tcp-connect host port)))
+ (let ((res (if (and inp oup)
+ (begin
+ (serialize dat oup)
+ (close-output-port oup)
+ (deserialize inp))
+ )))
+ (close-input-port inp)
+ res))))
+
+
+
+;;======================================================================
+;; server
+;;======================================================================
+
+(define (tt:sync-dbs ttdat)
+ #f)
+
+;; start the listener and start responding to requests
+;;
+;; NOTE: organise by dbfname, not run-id so we don't need
+;; to pull in more modules
+;;
+;; This is the routine called in megatest.scm to start a server
+;;
+;; Server viability is checked in keep-running. Blindly start and run here.
+;;
+(define (tt:start-server areapath run-id dbfname-in handler keys)
+ (assert areapath "FATAL: areapath not provided for tt:start-server")
+ ;; is there already a server for this dbfile? Then exit.
+ (let* ((ttdat (make-tt areapath: areapath))
+ (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))))
+ ;; (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
+ ;; (if (null? servers)
+ (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys)))
+ (tt-handler-set! ttdat (handler dbstruct))
+ (let* ((tcp-thread (make-thread
+ (lambda ()
+ (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data
+ "tcp-server-thread"))
+ (run-thread (make-thread
+ (lambda ()
+ (tt:keep-running ttdat dbfname dbstruct)))))
+ (thread-start! tcp-thread)
+ (thread-start! run-thread)
+ (thread-join! run-thread) ;; run thread will exit on timeout or other conditions
+ (exit)))
+ ;;(begin
+ ;; (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
+ ;; (exit)))))
+ ))
+
+(define (tt:keep-running ttdat dbfname dbstruct)
+ ;; verfiy conn for ready
+ ;; listener socket has been started by this stage
+ ;; wait for a port before creating the registration file
+ ;;
+ (let* ((db-locked-in #f)
+ (areapath (tt-areapath ttdat))
+ (nosyncdbpath (conc areapath"/.megatest"))
+ (cleanup (lambda ()
+ (if (tt-cleanup-proc ttdat)
+ ((tt-cleanup-proc ttdat)))
+ (dbfile:with-no-sync-db nosyncdbpath
+ (lambda (db)
+ (db:no-sync-del! db dbfname))))))
+ (set! *server-info* ttdat)
+ (let loop ((count 0))
+ (if (> count 240)
+ (begin
+ (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
+ (exit 1))
+ (if (not (tt-port ttdat)) ;; no connection yet
+ (begin
+ (thread-sleep! 0.25)
+ (loop (+ count 1))))))
+
+ ;; load or reload the data into inmem db before
+ ;; ((dbr:dbstruct-sync-proc dbstruct) (dbr:dbstruct-last-update dbstruct))
+ ;; (dbr:dbstruct-last-update-set! dbstruct (- (current-seconds) 1))
+ (tt:create-server-registration-file ttdat dbfname)
+ ;; now start watching the last-access, if it hasn't been touched
+ ;; in over ten seconds we exit
+ (thread-sleep! 0.05) ;; any real need for delay here?
+ (let loop ()
+ (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
+ (ok (cond
+ ((null? servers) #f) ;; not ok
+ ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
+ (tt-servinf-file ttdat))
+ (debug:print-info 0 *default-log-port* "Keep running, I'm the top server on "(tt-host ttdat)":"(tt-port ttdat))
+ (if db-locked-in
+ #t
+ (let* ((lockinfo (dbfile:with-no-sync-db nosyncdbpath
+ (lambda (db)
+ (db:no-sync-get-lock db dbfname))))
+ (success (car lockinfo)))
+ (if success
+ (begin
+ (tt-state-set! ttdat 'running)
+ (debug:print 0 *default-log-port* "Got server lock for "dbfname)
+ (set! db-locked-in #t)
+ #t)
+ (begin
+ (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
+ #f)))))
+ (else
+ (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
+ (let* ((leadsrv (car servers)))
+ (match leadsrv
+ ((host port startseconds server-id pid dbfname servinfofile)
+ (if (tt:ping host port server-id)
+ #f ;; not the server, but all good, want to exit
+ (if (and (file-exists? servinfofile)
+ (> (- (current-seconds)(file-modification-time servinfofile)) 15))
+ (begin
+ ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it
+ (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
+ (delete-file* servinfofile)
+ #t) ;; not the server but the server is not reachable
+ (begin
+ (debug:print 0 *default-log-port* "I'm not the server but will try again since "servinfofile" is fresh")
+ #t))))
+ (else ;; should never get here
+ (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
+ (assert #f "Bad server record "leadsrv))))))))
+ (if ok
+ ;; (if (> *api-process-request-count* 0) ;; have requests in flight
+ ;; (tt-last-access-set! ttdat (current-seconds)))
+ (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
+ (begin
+ (cleanup)
+ (exit)))
+
+ (let* ((last-update (dbr:dbstruct-last-update dbstruct))
+ (curr-secs (current-seconds)))
+ (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
+ (begin
+ ((dbr:dbstruct-sync-proc dbstruct) last-update)
+ (dbr:dbstruct-last-update-set! dbstruct curr-secs))))
+
+ (if (< (- (current-seconds) (tt-last-access ttdat)) 60)
+ (begin
+ (thread-sleep! 5)
+ (loop)))))
+ (cleanup)
+ (debug:print 0 *default-log-port* "INFO: Server timed out, exiting.")))
+
+
+;; ;; given an already set up uconn start the cmd-loop
+;; ;;
+;; (define (tt:cmd-loop ttdat)
+;; (let* ((serv-listener (-socket uconn))
+;; (listener (lambda ()
+;; (let loop ((state 'start))
+;; (let-values (((inp oup)(tcp-accept serv-listener)))
+;; ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params)
+;; (resp (ulex-handler uconn rdat)))
+;; (serialize resp oup)
+;; (close-input-port inp)
+;; (close-output-port oup)
+;; ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; )
+;; (loop state))))))
+;; ;; start N of them
+;; (let loop ((thnum 0)
+;; (threads '()))
+;; (if (< thnum 100)
+;; (let* ((th (make-thread listener (conc "listener" thnum))))
+;; (thread-start! th)
+;; (loop (+ thnum 1)
+;; (cons th threads)))
+;; (map thread-join! threads)))))
+;;
+;;
+;;
+;; (define (wait-and-close uconn)
+;; (thread-join! (udat-cmd-thread uconn))
+;; (tcp-close (udat-socket uconn)))
+;;
+;;
+
+(define (tt:shutdown-server ttdat)
+ (let* ((cleanproc (tt-cleanup-proc ttdat)))
+ (tt-state-set! ttdat 'shutdown)
+ (if cleanproc (cleanproc))
+ (tcp-close (tt-socket ttdat)) ;; close up ports here
+ ))
+
+;; (define (wait-and-close uconn)
+;; (thread-join! (tt-cmd-thread uconn))
+;; (tcp-close (tt-socket uconn)))
+
+;; return servid
+;; side-effects:
+;; ttdat-cleanup-proc is populated with function to remove the serverinfo file
+(define (tt:create-server-registration-file ttdat dbfname)
+ (let* ((areapath (tt-areapath ttdat))
+ (servdir (tt:get-servinfo-dir areapath))
+ (host (tt-host ttdat))
+ (port (tt-port ttdat))
+ (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
+ (serv-id (tt:mk-signature areapath))
+ (clean-proc (lambda ()
+ (delete-file* servinf))))
+ (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
+ (tt-cleanup-proc-set! ttdat clean-proc)
+ (tt-servinf-file-set! ttdat servinf)
+ (with-output-to-file servinf
+ (lambda ()
+ (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
+ serv-id))
+
+;; find valid server
+;; get servers listed, last part of name must match :
+;; if more than one, wait one second and look again
+;; future: ping oldest, if alive remove other : files
+;;
+(define (tt:find-server areapath dbfname)
+ (let* ((servdir (tt:get-servinfo-dir areapath))
+ (sfiles (glob (conc servdir"/*:"dbfname))))
+ sfiles))
+
+;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
+;; example of what it's looking for in the log file:
+;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
+;;
+(define (tt:server-get-info logf)
+ (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
+ (dbprep-rx (regexp "^SERVER: dbprep"))
+ (dbprep-found 0)
+ (bad-dat (list #f #f #f #f #f #f logf)))
+ (let ((fdat (handle-exceptions
+ exn
+ (begin
+ ;; WARNING: this is potentially dangerous to blanket ignore the errors
+ (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn=" exn)
+ '()) ;; no idea what went wrong, call it a bad server, return empty list
+ (with-input-from-file logf read-lines))))
+ (if (null? fdat) ;; bad data, return bad-dat
+ bad-dat
+ (let loop ((inl (car fdat))
+ (tail (cdr fdat))
+ (lnum 0))
+ (let ((mlst (string-match server-rx inl))
+ (dbprep (string-match dbprep-rx inl)))
+ (if dbprep (set! dbprep-found 1))
+ (if (not mlst)
+ (if (> lnum 500) ;; give up if more than 500 lines of server log read
+ bad-dat
+ (if (null? tail)
+ bad-dat
+ (loop (car tail)(cdr tail)(+ lnum 1))))
+ (match mlst ;; have a not null list
+ ((_ host port start server-id pid dbfname)
+ (list host
+ (string->number port)
+ (string->number start)
+ server-id
+ (string->number pid)
+ dbfname
+ logf))
+ (else
+ (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
+ bad-dat)))))))))
+
+;; Given an area path, start a server process ### NOTE ### > file 2>&1
+;; if the target-host is set
+;; try running on that host
+;; incidental: rotate logs in logs/ dir.
+;;
+(define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area
+ (assert areapath "FATAL: tt:server-process-run called without areapath defined.")
+ (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.")
+ (assert mtexe "FATAL: tt:server-process-run called without mtexe defined.")
+ ;; mtest -server - -m testsuite:ext-tests -db 6.db
+ (let* ((dbfname (dbmod:run-id->dbfname run-id))
+ (load (get-normalized-cpu-load))
+ (nrun (number-of-processes-running (conc "mtest.*server.*"testsuite".*"dbfname))))
+ (cond
+ ((> load 2.0)
+ (debug:print 0 *default-log-port* "Normalized load "load" is over the limit of 2.0. Not starting a server.")
+ (thread-sleep! 1))
+ ((> nrun 100)
+ (debug:print 0 *default-log-port* nrun" servers running on this host, not starting another.")
+ (thread-sleep! 1))
+ (else
+ (if (not (file-exists? (conc areapath"/logs")))
+ (create-directory (conc areapath"/logs") #t))
+ (let* ((logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+ (cmdln (conc
+ mtexe
+ " -server - ";; (or target-host "-")
+ " -m testsuite:" testsuite
+ ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this
+ " -db " dbfname ;; (dbmod:run-id->dbfname run-id)
+ " " profile-mode
+ ))) ;; (conc " >> " logfile " 2>&1 &")))))
+ ;; we want the remote server to start in *toppath* so push there
+ ;; (push-directory areapath) ;; use cd in the command line instead
+ (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
+ ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
+ (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
+ (setenv "NBFAKE_LOG" logfile)
+ (system (conc "cd "areapath" ; nbfake " cmdln))
+ (unsetenv "NBFAKE_QUIET")
+ (unsetenv "NBFAKE_LOG")
+ ;;(pop-directory)
+ )))))
+
+;;======================================================================
+;; tcp connection stuff
+;;======================================================================
+
+;; find a port and start tcp-server. This only starts the tcp portion of
+;; the server, look at (tt:start-server ...) above for the entry point
+;; for the entire server system
+;;
+(define (tt:start-tcp-server ttdat)
+ (setup-listener ttdat)
+ (let* ((socket (tt-socket ttdat))
+ (handler (tt-handler ttdat)))
+ ((make-tcp-server socket handler)
+ #f ;; yes, send error messages to std-err
+ )))
+
+;; create a tcp listener and return a populated udat struct with
+;; my port, address, hostname, pid etc.
+;; return #f if fail to find a port to allocate.
+;;
+;; if udata-in is #f create the record
+;; if there is already a serv-listener return the udata
+;;
+(define (setup-listener uconn #!optional (port 4242))
+ (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn)
+ (handle-exceptions
+ exn
+ (if (< port 65535)
+ (begin
+ (thread-sleep! 0.25)
+ (setup-listener uconn (+ port 1)))
+ #f)
+ (connect-listener uconn port)))
+
+(define (connect-listener uconn port)
+ ;; (tcp-listener-socket LISTENER)(socket-name so)
+ ;; sockaddr-address, sockaddr-port, sockaddr->string
+ (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
+ (addr (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
+ (tt-port-set! uconn port)
+ (tt-host-set! uconn addr)
+ (tt-host-port-set! uconn (conc addr":"port))
+ (tt-socket-set! uconn tlsn)
+ uconn))
+
+;;======================================================================
+;; utils
+;;======================================================================
+
+;; Generate a unique signature for this server
+(define (tt:mk-signature areapath)
+ (message-digest-string (md5-primitive)
+ (with-output-to-string
+ (lambda ()
+ (write (list areapath
+ (current-process-id)
+ (argv)))))))
+
+
+(define (tt:get-best-guess-address hostname)
+ (let ((res #f))
+ (for-each
+ (lambda (adr)
+ (if (not (eq? (u8vector-ref adr 0) 127))
+ (set! res adr)))
+ ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+ (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+ (string-intersperse
+ (map number->string
+ (u8vector->list
+ (if res res (hostname->ip hostname)))) ".")))
+
+(define (tt:get-servinfo-dir areapath)
+ (let* ((spath (conc areapath"/.servinfo")))
+ (if (not (file-exists? spath))
+ (create-directory spath #t))
+ spath))
+
+;;======================================================================
+;; network utilities
+;;======================================================================
+
+;; NOTE: Look at address-info egg as alternative to some of this
+
+(define (rate-ip ipaddr)
+ (regex-case ipaddr
+ ( "^127\\..*" _ 0 )
+ ( "^(10\\.0|192\\.168)\\..*" _ 1 )
+ ( else 2 ) ))
+
+;; Change this to bias for addresses with a reasonable broadcast value?
+;;
+(define (ip-pref-less? a b)
+ (> (rate-ip a) (rate-ip b)))
+
+(define (get-my-best-address)
+ (let ((all-my-addresses (get-all-ips)))
+ (cond
+ ((null? all-my-addresses)
+ (get-host-name)) ;; no interfaces?
+ ((eq? (length all-my-addresses) 1)
+ (car all-my-addresses)) ;; only one to choose from, just go with it
+ (else
+ (car (sort all-my-addresses ip-pref-less?))))))
+
+(define (get-all-ips-sorted)
+ (sort (get-all-ips) ip-pref-less?))
+
+(define (get-all-ips)
+ (map address-info-host
+ (filter (lambda (x)
+ (equal? (address-info-type x) "tcp"))
+ (address-infos (get-host-name)))))
+
+)
Index: tdb.scm
==================================================================
--- tdb.scm
+++ tdb.scm
@@ -32,10 +32,12 @@
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))
(declare (uses db))
+(declare (uses commonmod))
+(import commonmod)
(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")
Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -1966,13 +1966,13 @@
;;======================================================================
;; teststep-set-status! used to be here
(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
- (let* ((testdat (rmt:get-test-info-by-id run-id test-id)))
+ (let* ((testdat (rmt:get-test-state-status-by-id run-id test-id)))
(and testdat
- (equal? (test:get-state testdat) "KILLREQ"))))
+ (equal? (car testdat) "KILLREQ"))))
(define (test:tdb-get-rundat-count tdb)
(if tdb
(let ((res 0))
(sqlite3:for-each-row
ADDED transport-mode.scm.template
Index: transport-mode.scm.template
==================================================================
--- /dev/null
+++ transport-mode.scm.template
@@ -0,0 +1,3 @@
+;; 'http or 'tcp
+(rmt:transport-mode 'tcp)
+;; (rmt:transport-mode 'http)
ADDED ulex/dbmgr.scm
Index: ulex/dbmgr.scm
==================================================================
--- /dev/null
+++ ulex/dbmgr.scm
@@ -0,0 +1,1131 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+
+(declare (unit dbmgrmod))
+
+(declare (uses ulex))
+(declare (uses apimod))
+(declare (uses pkts))
+(declare (uses commonmod))
+(declare (uses dbmod))
+(declare (uses mtargs))
+(declare (uses portloggermod))
+(declare (uses debugprint))
+
+(module dbmgrmod
+ *
+
+(import scheme
+ chicken.base
+ chicken.condition
+ chicken.file
+ chicken.format
+ chicken.port
+ chicken.process
+ chicken.process-context
+ chicken.process-context.posix
+ chicken.sort
+ chicken.string
+ chicken.time
+
+ (prefix sqlite3 sqlite3:)
+ matchable
+ md5
+ message-digest
+ regex
+ s11n
+ srfi-1
+ srfi-18
+ srfi-69
+ system-information
+ typed-records
+
+ pkts
+ ulex
+
+ commonmod
+ apimod
+ dbmod
+ debugprint
+ (prefix mtargs args:)
+ portloggermod
+ )
+
+;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048)
+
+;; info about me as a listener and my connections to db servers
+;; stored (for now) in *db-serv-info*
+;;
+(defstruct servdat
+ (host #f)
+ (port #f)
+ (uuid #f)
+ (dbfile #f)
+ (uconn #f) ;; this is the listener *FOR THIS PROCESS*
+ (mode #f)
+ (status 'starting)
+ (trynum 0) ;; count the number of ports we've tried
+ (conns (make-hash-table)) ;; apath/dbname => conndat
+ )
+
+(define *db-serv-info* (make-servdat))
+
+(define (servdat->url sdat)
+ (conc (servdat-host sdat)":"(servdat-port sdat)))
+
+;; db servers contact info
+;;
+(defstruct conndat
+ (apath #f)
+ (dbname #f)
+ (fullname #f)
+ (hostport #f)
+ (ipaddr #f)
+ (port #f)
+ (srvpkt #f)
+ (srvkey #f)
+ (lastmsg 0)
+ (expires 0))
+
+(define *srvpktspec*
+ `((server (host . h)
+ (port . p)
+ (servkey . k)
+ (pid . i)
+ (ipaddr . a)
+ (dbpath . d))))
+
+;;======================================================================
+;; S U P P O R T F U N C T I O N S
+;;======================================================================
+
+;; set up the api proc, seems like there should be a better place for this?
+;;
+;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE
+;;
+;; (define api-proc (make-parameter conc))
+;; (api-proc api:execute-requests)
+
+;; do we have a connection to apath dbname and
+;; is it not expired? then return it
+;;
+;; else setup a connection
+;;
+;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
+;;
+(define (rmt:get-conn remdat apath dbname)
+ (let* ((fullname (db:dbname->path apath dbname)))
+ (hash-table-ref/default (servdat-conns remdat) fullname #f)))
+
+(define (rmt:drop-conn remdat apath dbname)
+ (let* ((fullname (db:dbname->path apath dbname)))
+ (hash-table-delete! (servdat-conns remdat) fullname)))
+
+(define (rmt:find-main-server uconn apath dbname)
+ (let* ((pktsdir (get-pkts-dir apath))
+ (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
+ (viable-srvs (get-viable-servers all-srvpkts dbname)))
+ (get-the-server uconn apath viable-srvs)))
+
+
+(define *connstart-mutex* (make-mutex))
+(define *last-main-start* 0)
+
+;; looks for a connection to main, returns if have and not exired
+;; creates new otherwise
+;;
+;; connections for other servers happens by requesting from main
+;;
+;; TODO: This is unnecessarily re-creating the record in the hash table
+;;
+(define (rmt:open-main-connection remdat apath)
+ (let* ((fullpath (db:dbname->path apath ".db/main.db"))
+ (conns (servdat-conns remdat))
+ (conn (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this
+ (start-rmt:run (lambda ()
+ (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server")))
+ (thread-start! th1)
+ (thread-sleep! 1)
+ (let loop ((count 0))
+ (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection")
+ (if (or (not *db-serv-info*)
+ (not (servdat-uconn *db-serv-info*)))
+ (begin
+ (thread-sleep! 1)
+ (loop (+ count 1)))
+ (begin
+ (servdat-mode-set! *db-serv-info* 'non-db)
+ (servdat-uconn *db-serv-info*)))))))
+ (myconn (servdat-uconn *db-serv-info*)))
+ (cond
+ ((not myconn)
+ (start-rmt:run)
+ (rmt:open-main-connection remdat apath))
+ ((and conn ;; conn is NOT a socket, just saying ...
+ (< (current-seconds) (conndat-expires conn)))
+ #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died
+ ((and conn
+ (>= (current-seconds)(conndat-expires conn)))
+ (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
+ (rmt:drop-conn remdat apath ".db/main.db") ;;
+ (rmt:open-main-connection remdat apath))
+ (else
+ ;; Below we will find or create and connect to main
+ (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch")
+ (let* ((dbname (db:run-id->dbname #f))
+ (the-srv (rmt:find-main-server myconn apath dbname))
+ (start-main-srv (lambda () ;; call IF there is no the-srv found
+ (mutex-lock! *connstart-mutex*)
+ (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
+ (begin
+ (api:run-server-process apath dbname)
+ (set! *last-main-start* (current-seconds))
+ (thread-sleep! 1))
+ (thread-sleep! 0.25))
+ (mutex-unlock! *connstart-mutex*)
+ (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
+ )))
+ (if (not the-srv) ;; have server, try connecting to it
+ (start-main-srv)
+ (let* ((srv-addr (server-address the-srv)) ;; need serv
+ (ipaddr (alist-ref 'ipaddr the-srv))
+ (port (alist-ref 'port the-srv))
+ (srvkey (alist-ref 'servkey the-srv))
+ (fullpath (db:dbname->path apath dbname))
+
+ (new-the-srv (make-conndat
+ apath: apath
+ dbname: dbname
+ fullname: fullpath
+ hostport: srv-addr
+ ;; socket: (open-nn-connection srv-addr) - TODO - open ulex connection?
+ ipaddr: ipaddr
+ port: port
+ srvpkt: the-srv
+ srvkey: srvkey ;; generated by rmt:get-signature on the server side
+ lastmsg: (current-seconds)
+ expires: (+ (current-seconds)
+ (server:expiration-timeout)
+ -2) ;; this needs to be gathered during the ping
+ )))
+ (hash-table-set! conns fullpath new-the-srv)))
+ #t)))))
+
+;; NB// sinfo is a servdat struct
+;;
+(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5))
+ (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
+ (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable
+ (fullname (db:dbname->path apath dbname))
+ (conns (servdat-conns sinfo))
+ (mconn (rmt:get-conn sinfo apath ".db/main.db"))
+ (dconn (rmt:get-conn sinfo apath dbname)))
+ #;(if (and mconn
+ (not (debug:print-logger)))
+ (begin
+ (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
+ (debug:print-logger rmt:log-to-main)))
+ (cond
+ ((and mconn
+ dconn
+ (< (current-seconds)(conndat-expires dconn)))
+ #t) ;; good to go
+ ((not mconn) ;; no channel open to main? open it...
+ (rmt:open-main-connection sinfo apath)
+ (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+ ((not dconn) ;; no channel open to dbname?
+ (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname))))
+ (case res
+ ((server-started)
+ (if (> num-tries 0)
+ (begin
+ (thread-sleep! 2)
+ (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+ (begin
+ (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname)
+ (exit 1))))
+ (else
+ (if (list? res) ;; server has been registered and the info was returned. pass it on.
+ (begin ;; ("192.168.0.9" 53817
+ ;; "5e34239f48e8973b3813221e54701a01" "24310"
+ ;; "192.168.0.9"
+ ;; "/home/matt/data/megatest/tests/simplerun"
+ ;; ".db/1.db")
+ (match
+ res
+ ((host port servkey pid ipaddr apath dbname)
+ (debug:print-info 0 *default-log-port* "got "res)
+ (hash-table-set! conns
+ fullname
+ (make-conndat
+ apath: apath
+ dbname: dbname
+ hostport: (conc host":"port)
+ ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection?
+ ipaddr: ipaddr
+ port: port
+ srvkey: servkey
+ lastmsg: (current-seconds)
+ expires: (+ (current-seconds)
+ (server:expiration-timeout)
+ -2))))
+ (else
+ (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res)))
+ res)
+ (begin
+ (debug:print-info 0 *default-log-port* "Unexpected result: " res)
+ res)))))))
+ #t))
+
+;;======================================================================
+
+;; FOR DEBUGGING SET TO #t
+;; (define *localmode* #t)
+(define *localmode* #f)
+(define *dbstruct* (make-dbr:dbstruct))
+
+;; Defaults to current area
+;;
+(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
+ (let* ((apath *toppath*)
+ (sinfo *db-serv-info*)
+ (dbname (db:run-id->dbname rid)))
+ (if *localmode*
+ (api:execute-requests *dbstruct* cmd params)
+ (begin
+ (rmt:open-main-connection sinfo apath)
+ (if rid (rmt:general-open-connection sinfo apath dbname))
+ #;(if (not (member cmd '(log-to-main)))
+ (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params))
+ (rmt:send-receive-real sinfo apath dbname cmd params)))))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future
+;;
+(define (rmt:send-receive-real sinfo apath dbname cmd params)
+ (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.")
+ (let* ((cdat (rmt:get-conn sinfo apath dbname)))
+ (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
+ (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex
+ ;; then send-receive using the ulex layer to host-port stored in cdat
+ (res (send-receive uconn (conndat-hostport cdat) cmd params))
+ #;(th1 (make-thread (lambda ()
+ (set! res (send-receive uconn (conndat-hostport cdat) cmd params)))
+ "send-receive thread")))
+ ;; (thread-start! th1)
+ ;; (thread-join! th1) ;; gratuitious thread stuff is so that mailbox is not used in primordial thead
+ ;; since we accessed the server we can bump the expires time up
+ (conndat-expires-set! cdat (+ (current-seconds)
+ (server:expiration-timeout)
+ -10)) ;; ten second margin for network time misalignments etc.
+ res)))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future.
+;;
+;; Purpose - call the main.db server and request a server be started
+;; for the given area path and dbname
+;;
+
+(define (rmt:print-db-stats)
+ (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+ (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
+ (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+ (for-each (lambda (cmd)
+ (let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+ (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+ (sort (hash-table-keys *db-stats*)
+ (lambda (a b)
+ (> (vector-ref (hash-table-ref *db-stats* a) 0)
+ (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+
+(define (rmt:get-max-query-average run-id)
+ (mutex-lock! *db-stats-mutex*)
+ (let* ((runkey (conc "run-id=" run-id " "))
+ (cmds (filter (lambda (x)
+ (substring-index runkey x))
+ (hash-table-keys *db-stats*)))
+ (res (if (null? cmds)
+ (cons 'none 0)
+ (let loop ((cmd (car cmds))
+ (tal (cdr cmds))
+ (max-cmd (car cmds))
+ (res 0))
+ (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+ (tot (vector-ref cmd-dat 0))
+ (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+ (currmax (max res curravg))
+ (newmax-cmd (if (> curravg res) cmd max-cmd)))
+ (if (null? tal)
+ (if (> tot 10)
+ (cons newmax-cmd currmax)
+ (cons 'none 0))
+ (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+ (mutex-unlock! *db-stats-mutex*)
+ res))
+
+;; host and port are used to ensure we are remove proper records
+(define (rmt:server-shutdown host port)
+ (let ((dbfile (servdat-dbfile *db-serv-info*)))
+ (debug:print-info 0 *default-log-port* "dbfile is "dbfile)
+ (if dbfile
+ (let* ((am-server (args:get-arg "-server"))
+ (dbfile (args:get-arg "-db"))
+ (apath *toppath*)
+ #;(sinfo *remotedat*)) ;; foundation for future fix
+ (if *dbstruct-db*
+ (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile))
+ (db (dbr:dbdat-db dbdat))
+ (inmem (dbr:dbdat-db dbdat)) ;; WRONG
+ )
+ ;; do a final sync here
+ (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
+ (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
+ ;; let's finalize here
+ (debug:print-info 0 *default-log-port* "Finalizing db and inmem")
+ (if (sqlite3:database? db)
+ (sqlite3:finalize! db)
+ (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
+ (if (sqlite3:database? inmem)
+ (sqlite3:finalize! inmem)
+ (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
+ (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete"))
+ (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do."))
+ (if (not am-server)
+ (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
+ (if (string-match ".*/main.db$" dbfile)
+ (let ((pkt-file (conc (get-pkts-dir *toppath*)
+ "/" (servdat-uuid *db-serv-info*)
+ ".pkt")))
+ (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
+ (delete-file* pkt-file)
+ (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
+ (db:with-lock-db
+ (servdat-dbfile *db-serv-info*)
+ (lambda (dbh dbfile)
+ (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
+ (let* ((sdat *db-serv-info*) ;; we have a run-id server
+ (host (servdat-host sdat))
+ (port (servdat-port sdat))
+ (uuid (servdat-uuid sdat))
+ (res (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile)))
+ (debug:print-info 0 *default-log-port* "deregistered-server, res="res)
+ (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
+ )))))))
+
+
+(define (common:run-sync?)
+ ;; (and (common:on-homehost?)
+ (args:get-arg "-server"))
+
+(define *rmt:run-mutex* (make-mutex))
+(define *rmt:run-flag* #f)
+
+;; Main entry point to start a server. was start-server
+(define (rmt:run hostn)
+ (mutex-lock! *rmt:run-mutex*)
+ (if *rmt:run-flag*
+ (begin
+ (debug:print-warn 0 *default-log-port* "rmt:run already running.")
+ (mutex-unlock! *rmt:run-mutex*))
+ (begin
+ (set! *rmt:run-flag* #t)
+ (mutex-unlock! *rmt:run-mutex*)
+ ;; ;; Configurations for server
+ ;; (tcp-buffer-size 2048)
+ ;; (max-connections 2048)
+ (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...")
+ (if (and *db-serv-info*
+ (servdat-uconn *db-serv-info*))
+ (let* ((uconn (servdat-uconn *db-serv-info*)))
+ (wait-and-close uconn))
+ (let* ((port (portlogger:open-run-close portlogger:find-port))
+ (handler-proc (lambda (rem-host-port qrykey cmd params) ;;
+ (set! *db-last-access* (current-seconds))
+ (assert (list? params) "FATAL: handler called with non-list params")
+ (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params)
+ (debug:print 0 *default-log-port* "handler call: "cmd", params="params)
+ (api:execute-requests *dbstruct-db* cmd params))))
+ ;; (api:process-request *dbstuct-db*
+ (if (not *db-serv-info*)
+ (set! *db-serv-info* (make-servdat host: hostn port: port)))
+ (let* ((uconn (run-listener handler-proc port))
+ (rport (udat-port uconn))) ;; the real port
+ (servdat-host-set! *db-serv-info* hostn)
+ (servdat-port-set! *db-serv-info* rport)
+ (servdat-uconn-set! *db-serv-info* uconn)
+ (wait-and-close uconn)
+ (db:print-current-query-stats)
+ )))
+ (let* ((host (servdat-host *db-serv-info*))
+ (port (servdat-port *db-serv-info*))
+ (mode (or (servdat-mode *db-serv-info*)
+ "non-db")))
+ ;; server exit stuff here
+ ;; (rmt:server-shutdown host port) - always do in on-exit
+ ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit
+ (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting")
+ ))))
+
+;;======================================================================
+;; S E R V E R U T I L I T I E S
+;;======================================================================
+
+
+;;======================================================================
+;; NEW SERVER METHOD
+;;======================================================================
+
+;; only use for main.db - need to re-write some of this :(
+;;
+(define (get-lock-db sdat dbfile host port)
+ (assert host "FATAL: get-lock-db called with host not set.")
+ (assert port "FATAL: get-lock-db called with port not set.")
+ (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
+ (res (db:get-iam-server-lock dbh dbfile host port))
+ (uconn (servdat-uconn sdat)))
+ ;; res => list then already locked, check server is responsive
+ ;; => #t then sucessfully got the lock
+ ;; => #f reserved for future use as to indicate something went wrong
+ (match res
+ ((owner_pid owner_host owner_port event_time)
+ (if (server-ready? uconn (conc owner_host":"owner_port) "abc")
+ #f ;; locked by someone else
+ (begin ;; locked by someone dead and gone
+ (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
+ (db:steal-lock-db dbh dbfile port))))
+ (#t #t) ;; placeholder so that we don't touch res if it is #t
+ (else (set! res #f)))
+ (sqlite3:finalize! dbh)
+ res))
+
+
+(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
+ (let* ((pkt-dat `((host . ,host)
+ (port . ,port)
+ (servkey . ,servkey)
+ (pid . ,(current-process-id))
+ (ipaddr . ,ipaddr)
+ (dbpath . ,dbpath)))
+ (uuid (write-alist->pkt
+ pkts-dir
+ pkt-dat
+ pktspec: pkt-spec
+ ptype: 'server)))
+ (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
+ uuid))
+
+(define (get-pkts-dir #!optional (apath #f))
+ (let* ((effective-toppath (or *toppath* apath)))
+ (assert effective-toppath
+ "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
+ (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
+ (if (file-exists? pdir)
+ pdir
+ (begin
+ (handle-exceptions ;; this exception handler should NOT be needed but ...
+ exn
+ pdir
+ (create-directory pdir #t))
+ pdir)))))
+
+;; given a pkts dir read
+;;
+(define (get-all-server-pkts pktsdir-in pktspec)
+ (let* ((pktsdir (if (file-exists? pktsdir-in)
+ pktsdir-in
+ (begin
+ (create-directory pktsdir-in #t)
+ pktsdir-in)))
+ (all-pkt-files (glob (conc pktsdir "/*.pkt"))))
+ (map (lambda (pkt-file)
+ (read-pkt->alist pkt-file pktspec: pktspec))
+ all-pkt-files)))
+
+(define (server-address srv-pkt)
+ (conc (alist-ref 'host srv-pkt) ":"
+ (alist-ref 'port srv-pkt)))
+
+(define (server-ready? uconn host-port key) ;; server-address is host:port
+ (let* ((params `((cmd . ping)(key . ,key)))
+ (data `((cmd . ping)
+ (key . ,key)
+ (params . ,params))) ;; I don't get it.
+ (res (send-receive uconn host-port 'ping data)))
+ (if (eq? res 'ack) ;; yep, likely it is who we want on the other end
+ res
+ #f)))
+;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f))))
+
+; from the pkts return servers associated with dbpath
+;; NOTE: Only one can be alive - have to check on each
+;; in the list of pkts returned
+;;
+(define (get-viable-servers serv-pkts dbpath)
+ (let loop ((tail serv-pkts)
+ (res '()))
+ (if (null? tail)
+ res ;; NOTE: sort by age so oldest is considered first
+ (let* ((spkt (car tail)))
+ (loop (cdr tail)
+ (if (equal? dbpath (alist-ref 'dbpath spkt))
+ (cons spkt res)
+ res))))))
+
+(define (remove-pkts-if-not-alive uconn serv-pkts)
+ (filter (lambda (pkt)
+ (let* ((host (alist-ref 'host pkt))
+ (port (alist-ref 'port pkt))
+ (host-port (conc host":"port))
+ (key (alist-ref 'servkey pkt))
+ (pktz (alist-ref 'Z pkt))
+ (res (server-ready? uconn host-port key)))
+ (if res
+ res
+ (let* ((pktsdir (get-pkts-dir *toppath*))
+ (pktpath (conc pktsdir"/"pktz".pkt")))
+ (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
+ (delete-file* pktpath)
+ #f))))
+ serv-pkts))
+
+;; from viable servers get one that is alive and ready
+;;
+(define (get-the-server uconn apath serv-pkts)
+ (let loop ((tail serv-pkts))
+ (if (null? tail)
+ #f
+ (let* ((spkt (car tail))
+ (host (alist-ref 'ipaddr spkt))
+ (port (alist-ref 'port spkt))
+ (host-port (conc host":"port))
+ (dbpth (alist-ref 'dbpath spkt))
+ (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt))
+ (addr (server-address spkt)))
+ (if (server-ready? uconn host-port srvkey)
+ spkt
+ (loop (cdr tail)))))))
+
+;; am I the "first" in line server? I.e. my D card is smallest
+;; use Z card as tie breaker
+;;
+(define (get-best-candidate serv-pkts dbpath)
+ (if (null? serv-pkts)
+ #f
+ (let loop ((tail serv-pkts)
+ (best (car serv-pkts)))
+ (if (null? tail)
+ best
+ (let* ((candidate (car tail))
+ (candidate-bd (string->number (alist-ref 'D candidate)))
+ (best-bd (string->number (alist-ref 'D best)))
+ ;; bigger number is younger
+ (candidate-z (alist-ref 'Z candidate))
+ (best-z (alist-ref 'Z best))
+ (new-best (cond
+ ((> best-bd candidate-bd) ;; best is younger than candidate
+ candidate)
+ ((< best-bd candidate-bd) ;; candidate is younger than best
+ best)
+ (else
+ (if (string>=? best-z candidate-z)
+ best
+ candidate))))) ;; use Z card as tie breaker
+ (if (null? tail)
+ new-best
+ (loop (cdr tail) new-best)))))))
+
+
+;;======================================================================
+;; END NEW SERVER METHOD
+;;======================================================================
+
+;; if .db/main.db check the pkts
+;;
+(define (rmt:wait-for-server pkts-dir db-file server-key)
+ (let* ((sdat *db-serv-info*))
+ (let loop ((start-time (current-seconds))
+ (changed #t)
+ (last-sdat "not this"))
+ (begin ;; let ((sdat #f))
+ (thread-sleep! 0.01)
+ (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+ (mutex-lock! *heartbeat-mutex*)
+ (set! sdat *db-serv-info*)
+ (mutex-unlock! *heartbeat-mutex*)
+ (if (and sdat
+ (not changed)
+ (> (- (current-seconds) start-time) 2))
+ (let* ((uconn (servdat-uconn sdat)))
+ (servdat-status-set! sdat 'iface-stable)
+ (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
+ ;; create a server pkt in *toppath*/.meta/srvpkts
+
+ ;; TODO:
+ ;; 1. change sdat to stuct
+ ;; 2. add uuid to struct
+ ;; 3. update uuid in sdat here
+ ;;
+ (servdat-uuid-set! sdat
+ (register-server
+ pkts-dir *srvpktspec*
+ (get-host-name)
+ (servdat-port sdat) server-key
+ (servdat-host sdat) db-file))
+ ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
+ ;; now read pkts and see if we are a contender
+ (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*))
+ (viables (get-viable-servers all-pkts db-file))
+ (alive (remove-pkts-if-not-alive uconn viables))
+ (best-srv (get-best-candidate alive db-file))
+ (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
+ (i-am-srv (equal? best-srv-key server-key))
+ (delete-pkt (lambda ()
+ (let* ((pktfile (conc (get-pkts-dir *toppath*)
+ "/" (servdat-uuid *db-serv-info*)
+ ".pkt")))
+ (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
+ (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
+ (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
+ ;; am I the best-srv, compare server-keys to know
+ (if i-am-srv
+ (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
+ (begin
+ (debug:print-info 0 *default-log-port* "I'm the server!")
+ (servdat-dbfile-set! sdat db-file)
+ (servdat-status-set! sdat 'db-locked))
+ (begin
+ (debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
+ (bdat-time-to-exit-set! *bdat* #t)
+ (delete-pkt)
+ (thread-sleep! 0.2)
+ (exit)))
+ (begin
+ (debug:print-info 0 *default-log-port*
+ "Keys do not match "best-srv-key", "server-key", exiting.")
+ (bdat-time-to-exit-set! *bdat* #t)
+ (delete-pkt)
+ (thread-sleep! 0.2)
+ (exit)))
+ sdat))
+ (begin ;; sdat not yet contains server info
+ (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+ (sleep 4)
+ (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+ (begin
+ (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+ (exit))
+ (loop start-time
+ (equal? sdat last-sdat)
+ sdat))))))))
+
+(define (rmt:register-server sinfo apath iface port server-key dbname)
+ (servdat-conns sinfo) ;; just checking types
+ (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+ (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath
+ (db:run-id->dbname #f)
+ 'register-server `(,iface
+ ,port
+ ,server-key
+ ,(current-process-id)
+ ,iface
+ ,apath
+ ,dbname)))
+
+(define (rmt:get-count-servers sinfo apath)
+ (servdat-conns sinfo) ;; just checking types
+ (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+ (rmt:send-receive-real sinfo apath ;; params: host port servkey pid ipaddr dbpath
+ (db:run-id->dbname #f)
+ 'get-count-servers `(,apath)))
+
+(define (rmt:get-servers-info apath)
+ (rmt:send-receive 'get-servers-info #f `(,apath)))
+
+(define (rmt:deregister-server db-serv-info apath iface port server-key dbname)
+ (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db
+ (rmt:send-receive-real db-serv-info apath ;; params: host port servkey pid ipaddr dbpath
+ (db:run-id->dbname #f)
+ 'deregister-server `(,iface
+ ,port
+ ,server-key
+ ,(current-process-id)
+ ,iface
+ ,apath
+ ,dbname)))
+
+(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100))
+ ;; wait until *db-serv-info* stops changing
+ (let* ((stime (current-seconds)))
+ (let loop ((last-host #f)
+ (last-port #f)
+ (tries 0))
+ (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*)))
+ (curr-port (and *db-serv-info* (servdat-port *db-serv-info*))))
+ ;; first we verify port and interface, update *db-serv-info* in need be.
+ (cond
+ ((> tries num-tries-allowed)
+ (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.")
+ (exit 1))
+ ((not *db-serv-info*)
+ (thread-sleep! 0.25)
+ (loop curr-host curr-port (+ tries 1)))
+ ((or (not last-host)(not last-port))
+ (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries)
+ (thread-sleep! 0.25)
+ (loop curr-host curr-port (+ tries 1)))
+ ((or (not (equal? last-host curr-host))
+ (not (equal? last-port curr-port)))
+ (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+ (thread-sleep! 0.25)
+ (loop curr-host curr-port (+ tries 1)))
+ ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed
+ (thread-sleep! 0.5)
+ (loop curr-host curr-port (+ tries 1)))
+ (else
+ (rmt:get-signature) ;; sets *my-signature* as side effect
+ (servdat-status-set! *db-serv-info* 'interface-stable)
+ (debug:print 0 *default-log-port*
+ "SERVER STARTED: " curr-host
+ ":" curr-port
+ " AT " (current-seconds) " server signature: " *my-signature*
+ " with "(servdat-trynum *db-serv-info*)" port changes")
+ (flush-output *default-log-port*)
+ #t))))))
+
+;; run rmt:keep-running in a parallel thread to monitor that the db is being
+;; used and to shutdown after sometime if it is not.
+;;
+(define (rmt:keep-running dbname)
+ ;; if none running or if > 20 seconds since
+ ;; server last used then start shutdown
+ ;; This thread waits for the server to come alive
+ (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+
+ (let* ((sinfo *db-serv-info*)
+ (server-start-time (current-seconds))
+ (pkts-dir (get-pkts-dir))
+ (server-key (rmt:get-signature)) ;; This servers key
+ (is-main (equal? (args:get-arg "-db") ".db/main.db"))
+ (last-access 0)
+ (server-timeout (server:expiration-timeout))
+ (shutdown-server-sequence (lambda (host port)
+ (set! *unclean-shutdown* #f) ;; Should not be needed anymore
+ (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+ ;; (rmt:server-shutdown host port) -- called in on-exit
+ ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit
+ (exit)))
+ (timed-out? (lambda ()
+ (<= (+ last-access server-timeout)
+ (current-seconds)))))
+ (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db"))
+ ;; main and run db servers have both got wait logic (could/should merge it)
+ (if is-main
+ (rmt:wait-for-server pkts-dir dbname server-key)
+ (rmt:wait-for-stable-interface))
+ ;; this is our forever loop
+ (let* ((iface (servdat-host *db-serv-info*))
+ (port (servdat-port *db-serv-info*))
+ (uconn (servdat-uconn *db-serv-info*)))
+ (let loop ((count 0)
+ (bad-sync-count 0)
+ (start-time (current-milliseconds)))
+ (if (and (not is-main)
+ (common:low-noise-print 60 "servdat-status"))
+ (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*)))
+
+ (mutex-lock! *heartbeat-mutex*)
+ ;; set up the database handle
+ (if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
+ (let ((watchdog (bdat-watchdog *bdat*)))
+ (debug:print 0 *default-log-port* "SERVER: dbprep")
+ (db:setup dbname) ;; sets *dbstruct-db* as side effect
+ (servdat-status-set! *db-serv-info* 'db-opened)
+ ;; IFF I'm not main, call into main and register self
+ (if (not is-main)
+ (let ((res (rmt:register-server sinfo
+ *toppath* iface port
+ server-key dbname)))
+ (if res ;; we are the server
+ (servdat-status-set! *db-serv-info* 'have-interface-and-db)
+ ;; now check that the db locker is alive, clear it out if not
+ (let* ((serv-info (rmt:server-info *toppath* dbname)))
+ (match serv-info
+ ((host port servkey pid ipaddr apath dbpath)
+ (if (not (server-ready? uconn (conc host":"port) servkey))
+ (begin
+ (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
+ (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
+ (loop (+ count 1) bad-sync-count start-time))))
+ (else
+ (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
+ (exit)))))))
+ (debug:print 0 *default-log-port*
+ "SERVER: running, db "dbname" opened, megatest version: "
+ (common:get-full-version))
+ ;; start the watchdog
+
+ ;; is this really needed?
+
+ #;(if watchdog
+ (if (not (member (thread-state watchdog)
+ '(ready running blocked
+ sleeping dead)))
+ (begin
+ (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
+ (thread-start! watchdog))
+ (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
+ (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
+ #;(loop (+ count 1) bad-sync-count start-time)
+ ))
+
+ (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
+
+ (mutex-unlock! *heartbeat-mutex*)
+
+ ;; when things go wrong we don't want to be doing the various
+ ;; queries too often so we strive to run this stuff only every
+ ;; four seconds or so.
+ (let* ((sync-time (- (current-milliseconds) start-time))
+ (rem-time (quotient (- 4000 sync-time) 1000)))
+ (if (and (<= rem-time 4)
+ (> rem-time 0))
+ (thread-sleep! rem-time)))
+
+ ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+ (set! last-access *db-last-access*)
+
+ (if (< count 1) ;; 3x3 = 9 secs aprox
+ (loop (+ count 1) bad-sync-count (current-milliseconds)))
+
+ (if (common:low-noise-print 60 "dbstats")
+ (begin
+ (debug:print 0 *default-log-port* "Server stats:")
+ (db:print-current-query-stats)))
+ (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
+ (cond
+ ((not *server-run*)
+ (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
+ (shutdown-server-sequence (get-host-name) port))
+ ((timed-out?)
+ (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+ (shutdown-server-sequence (get-host-name) port))
+ ((and *server-run*
+ (or (not (timed-out?))
+ (if is-main ;; do not exit if there are other servers (keep main open until all others gone)
+ (> (rmt:get-count-servers sinfo *toppath*) 1)
+ #f)))
+ (if (common:low-noise-print 120 "server continuing")
+ (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
+ (loop 0 bad-sync-count (current-milliseconds)))
+ (else
+ (set! *unclean-shutdown* #f)
+ (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+ (shutdown-server-sequence (get-host-name) port)
+ #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
+ (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown
+ (sexpr->string 'quit))))))))))
+
+(define (rmt:get-reasonable-hostname)
+ (let* ((inhost (or (args:get-arg "-server") "-")))
+ (if (equal? inhost "-")
+ (get-host-name)
+ inhost)))
+
+;; Call this to start the actual server
+;;
+;; all routes though here end in exit ...
+;;
+;; This is the point at which servers are started
+;;
+(define (rmt:server-launch dbname)
+ (debug:print-info 0 *default-log-port* "Entered rmt:server-launch")
+ (let* ((th2 (make-thread (lambda ()
+ (debug:print-info 0 *default-log-port* "Server run thread started")
+ (rmt:run (rmt:get-reasonable-hostname)))
+ "Server run"))
+ (th3 (make-thread (lambda ()
+ (debug:print-info 0 *default-log-port* "Server monitor thread started")
+ (if (args:get-arg "-server")
+ (rmt:keep-running dbname)))
+ "Keep running")))
+ (thread-start! th2)
+ (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
+ (thread-start! th3)
+ (set! *didsomething* #t)
+ (thread-join! th2)
+ (thread-join! th3))
+ #f)
+
+;;======================================================================
+;; S E R V E R - D I R E C T C A L L S
+;;======================================================================
+
+(define (rmt:kill-server run-id)
+ (rmt:send-receive 'kill-server #f (list run-id)))
+
+(define (rmt:start-server run-id)
+ (rmt:send-receive 'start-server #f (list run-id)))
+
+(define (rmt:server-info apath dbname)
+ (rmt:send-receive 'get-server-info #f (list apath dbname)))
+
+;;======================================================================
+;; Nanomsg transport
+;;======================================================================
+
+#;(define (is-port-in-use port-num)
+ (let* ((ret #f))
+ (let-values (((inp oup pid)
+ (process "netstat" (list "-tulpn" ))))
+ (let loop ((inl (read-line inp)))
+ (if (not (eof-object? inl))
+ (begin
+ (if (string-search (regexp (conc ":" port-num)) inl)
+ (begin
+ ;(print "Output: " inl)
+ (set! ret #t))
+ (loop (read-line inp)))))))
+ ret))
+
+#;(define (open-nn-connection host-port)
+ (let ((req (make-req-socket))
+ (uri (conc "tcp://" host-port)))
+ (nng-dial req uri)
+ (socket-set! req 'nng/recvtimeo 2000)
+ req))
+
+#;(define (send-receive-nn req msg)
+ (nng-send req msg)
+ (nng-recv req))
+
+#;(define (close-nn-connection req)
+ (nng-close! req))
+
+;; ;; open connection to server, send message, close connection
+;; ;;
+;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+;; (let ((req (make-req-socket 'req))
+;; (uri (conc "tcp://" host-port))
+;; (res #f)
+;; ;; (contacts (alist-ref 'contact attrib))
+;; ;; (mode (alist-ref 'mode attrib))
+;; )
+;; (socket-set! req 'nng/recvtimeo 2000)
+;; (handle-exceptions
+;; exn
+;; (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+;; ;; Send notification
+;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" )
+;; #f)
+;; (nng-dial req uri)
+;; ;; (print "Connected to the server " )
+;; (nng-send req msg)
+;; ;; (print "Request Sent")
+;; (let* ((th1 (make-thread (lambda ()
+;; (let ((resp (nng-recv req)))
+;; (nng-close! req)
+;; (set! res (if (equal? resp "ok")
+;; #t
+;; #f))))
+;; "recv thread"))
+;; (th2 (make-thread (lambda ()
+;; (thread-sleep! timeout)
+;; (thread-terminate! th1))
+;; "timer thread")))
+;; (thread-start! th1)
+;; (thread-start! th2)
+;; (thread-join! th1)
+;; res))))
+;;
+#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+ (let ((req (make-req-socket))
+ (uri (conc "tcp://" host-port))
+ (res #f))
+ (handle-exceptions
+ exn
+ (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+ ;; Send notification
+ (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn)
+ #f)
+ (nng-dial req uri)
+ (nng-send req msg)
+ (let* ((th1 (make-thread (lambda ()
+ (let ((resp (nng-recv req)))
+ (nng-close! req)
+ ;; (print resp)
+ (set! res resp)))
+ "recv thread"))
+ (th2 (make-thread (lambda ()
+ (thread-sleep! timeout)
+ (thread-terminate! th1))
+ "timer thread")))
+ (thread-start! th1)
+ (thread-start! th2)
+ (thread-join! th1)
+ res))))
+
+;;======================================================================
+;; S E R V E R U T I L I T I E S
+;;======================================================================
+
+;; run ping in separate process, safest way in some cases
+;;
+#;(define (server:ping-server ifaceport)
+ (with-input-from-pipe
+ (conc (common:get-megatest-exe) " -ping " ifaceport)
+ (lambda ()
+ (let loop ((inl (read-line))
+ (res "NOREPLY"))
+ (if (eof-object? inl)
+ (case (string->symbol res)
+ ((NOREPLY) #f)
+ ((LOGIN_OK) #t)
+ (else #f))
+ (loop (read-line) inl))))))
+
+;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;;
+#;(define (server:login toppath)
+ (lambda (toppath)
+ (set! *db-last-access* (current-seconds)) ;; might not be needed.
+ (if (equal? *toppath* toppath)
+ #t
+ #f)))
+
+;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; (define (server:release-sync-lock)
+;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; (define (server:have-sync-lock?)
+;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;; (have-lock? (car have-lock-pair))
+;; (lock-time (cdr have-lock-pair))
+;; (lock-age (- (current-seconds) lock-time)))
+;; (cond
+;; (have-lock? #t)
+;; ((>lock-age
+;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;; (server:release-sync-lock)
+;; (server:have-sync-lock?))
+;; (else #f))))
+
+)
Index: ulex/ulex.scm
==================================================================
--- ulex/ulex.scm
+++ ulex/ulex.scm
@@ -1,8 +1,8 @@
;; ulex: Distributed sqlite3 db
;;;
-;; Copyright (C) 2018 Matt Welland
+;; Copyright (C) 2018-2021 Matt Welland
;; Redistribution and use in source and binary forms, with or without
;; modification, is permitted.
;;
;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -23,330 +23,521 @@
;; NOTES:
;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity.
;;
;;======================================================================
-(use mailbox)
-
-(module ulex
- *
-
-(import scheme posix chicken data-structures ports extras files mailbox)
-(import srfi-18 pkts matchable regex
- typed-records srfi-69 srfi-1
- srfi-4 regex-case
- (prefix sqlite3 sqlite3:)
- foreign
- tcp6
- ;; ulex-netutil
- hostinfo
- )
-
-;; make it a global? Well, it is local to area module
-
-(define *captain-pktspec*
- `((captain (host . h)
- (port . p)
- (pid . i)
- (ipaddr . a)
- )
- #;(data (hostname . h) ;; sender hostname
- (port . p) ;; sender port
- (ipaddr . a) ;; sender ip
- (hostkey . k) ;; sending host key - store info at server under this key
- (servkey . s) ;; server key - this needs to match at server end or reject the msg
- (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json
- (data . d) ;; base64 encoded slln data
- )))
-
-;; struct for keeping track of our world
-
-(defstruct udat
- ;; captain info
- (captain-address #f)
- (captain-host #f)
- (captain-port #f)
- (captain-pid #f)
- (captain-lease 0) ;; time (unix epoc) seconds when the lease is up
- (ulex-dir (conc (get-environment-variable "HOME") "/.ulex"))
- (cpkts-dir (conc (get-environment-variable "HOME") "/.ulex/pkts"))
- (cpkt-spec *captain-pktspec*)
- ;; this processes info
- (my-cpkt-key #f) ;; put Z card here when I create a pkt for myself as captain
- (my-address #f)
- (my-hostname #f)
- (my-port #f)
- (my-pid (current-process-id))
- (my-dbs '())
- ;; server and handler thread
- (serv-listener #f) ;; this processes server info
- (handler-thread #f)
- (mboxes (make-hash-table)) ;; key => mbox
- ;; other servers
- (peers (make-hash-table)) ;; host-port => peer record
- (dbowners (make-hash-table)) ;; dbfile => host-port
- (handlers (make-hash-table)) ;; dbfile => proc
- ;; (outgoing-conns (make-hash-table)) ;; host:port -> conn
- (work-queue (make-queue)) ;; most stuff goes here
- ;; (fast-queue (make-queue)) ;; super quick stuff goes here (e.g. ping)
- (busy #f) ;; is either of the queues busy, use to switch between queuing tasks or doing immediately
- ;; app info
- (appname #f)
- (dbtypes (make-hash-table)) ;; this should be an alist but hash is easier. dbtype => [ initproc syncproc ]
- ;; cookies
- (cnum 0) ;; cookie num
- )
-
-;;======================================================================
-;; NEW APPROACH
-;;======================================================================
-
-;; start-server-find-port ;; gotta have a server port ready from the very begining
-
-;; udata - all the connection info, captain, server, ulex db etc. MUST BE PASSED IN
-;; dbpath - full path and filename of the db to talk to or a symbol naming the db?
-;; callname - the remote call to execute
-;; params - parameters to pass to the remote call
-;;
-(define (remote-call udata dbpath dbtype callname . params)
- (start-server-find-port udata) ;; ensure we have a local server
- (find-or-setup-captain udata)
- ;; look at connect, process-request, send, send-receive
- (let-values (((cookie-key host-port)(get-db-owner udata dbpath dbtype)))
- (send-receive udata host-port callname cookie-key params)))
-
-;;======================================================================
-;; KEY FUNCTIONS - THESE ARE TOO BE EXPOSED AND USED
-;;======================================================================
-
-;; connection setup and management functions
-
-;; This is the basic setup command. Must always be
-;; called before connecting to a db using connect.
-;;
-;; find or become the captain
-;; setup and return a ulex object
-;;
-(define (find-or-setup-captain udata)
- ;; see if we already have a captain and if the lease is ok
- (if (and (udat-captain-address udata)
- (udat-captain-port udata)
- (< (current-seconds) (udat-captain-lease udata)))
- udata
- (let* ((cpkts (get-all-captain-pkts udata)) ;; read captain pkts
- (captn (get-winning-pkt cpkts)))
- (if captn
- (let* ((port (alist-ref 'port captn))
- (host (alist-ref 'host captn))
- (ipaddr (alist-ref 'ipaddr captn))
- (pid (alist-ref 'pid captn))
- (Z (alist-ref 'Z captn)))
- (udat-captain-address-set! udata ipaddr)
- (udat-captain-host-set! udata host)
- (udat-captain-port-set! udata port)
- (udat-captain-pid-set! udata pid)
- (udat-captain-lease-set! udata (+ (current-seconds) 10))
- (let-values (((success pingtime)(ping udata (conc ipaddr ":" port))))
- (if success
- udata
- (begin
- (print "Found unreachable captain at " ipaddr ":" port ", removing pkt")
- (remove-captain-pkt udata captn)
- (find-or-setup-captain udata))))
- (begin
- (setup-as-captain udata) ;; this saves the thread to captain-thread and starts the thread
- (find-or-setup-captain udata)))))))
-
-;; connect to a specific dbfile
-;; - if already connected - return the dbowner host-port
-;; - ask the captain who to talk to for this db
-;; - put the entry in the dbowners hash as dbfile => host-port
-;;
-(define (connect udata dbfname dbtype)
- (or (hash-table-ref/default (udat-dbowners udata) dbfname #f)
- (let-values (((success dbowner-host-port)(get-db-owner udata dbfname dbtype)))
- (if success
- (begin
- ;; just clobber the record, this is the new data no matter what
- (hash-table-set! (udat-dbowners udata) dbfname dbowner-host-port)
- dbowner-host-port)
- #f))))
-
-;; returns: success pingtime
-;;
-;; NOTE: causes the callee to store the info on this host along with the dbs this host currently owns
-;;
-(define (ping udata host-port)
- (let* ((start (current-milliseconds))
- (cookie (make-cookie udata))
- (dbs (udat-my-dbs udata))
- (msg (string-intersperse dbs " "))
- (res (send udata host-port 'ping cookie msg retval: #t))
- (delta (- (current-milliseconds) start)))
- (values (equal? res cookie) delta)))
-
-;; returns: success pingtime
-;;
-;; NOTE: causes all references to this worker to be wiped out in the
-;; callee (ususally the captain)
-;;
-(define (goodbye-ping udata host-port)
- (let* ((start (current-milliseconds))
- (cookie (make-cookie udata))
- (dbs (udat-my-dbs udata))
- (res (send udata host-port 'goodbye cookie "nomsg" retval: #t))
- (delta (- (current-milliseconds) start)))
- (values (equal? res cookie) delta)))
-
-(define (goodbye-captain udata)
- (let* ((host-port (udat-captain-host-port udata)))
- (if host-port
- (goodbye-ping udata host-port)
- (values #f -1))))
-
-(define (get-db-owner udata dbname dbtype)
- (let* ((host-port (udat-captain-host-port udata)))
- (if host-port
- (let* ((cookie (make-cookie udata))
- (msg #f) ;; (conc dbname " " dbtype))
- (params `(,dbname ,dbtype))
- (res (send udata host-port 'db-owner cookie msg
- params: params retval: #t)))
- (match (string-split res)
- ((retcookie owner-host-port)
- (values (equal? retcookie cookie) owner-host-port))))
- (values #f -1))))
-
-;; called in ulex-handler to dispatch work, called on the workers side
-;; calls (proc params data)
-;; returns result with cookie
-;;
-;; pdat is the info of the caller, used to send the result data
-;; prockey is key into udat-handlers hash dereferencing a proc
-;; procparam is a first param handed to proc - often to do further derefrencing
-;; NOTE: params is intended to be a list of strings, encoding on data
-;; is up to the user but data must be a single line
-;;
-(define (process-request udata pdat dbname cookie prockey procparam data)
- (let* ((dbrec (ulex-open-db udata dbname)) ;; this will be a dbconn record, looks for in udata first
- (proc (hash-table-ref udata prockey)))
- (let* ((result (proc dbrec procparam data)))
- result)))
-
-;; remote-request - send to remote to process in process-request
-;; uconn comes from a call to connect and can be used instead of calling connect again
-;; uconn is the host-port to call
-;; we send dbname to the worker so they know which file to open
-;; data must be a string with no newlines, it will be handed to the proc
-;; at the remote site unchanged. It is up to the user to encode/decode it's contents
-;;
-;; rtype: immediate, read-only, normal, low-priority
-;;
-(define (remote-request udata uconn rtype dbname prockey procparam data)
- (let* ((cookie (make-cookie udata)))
- (send-receive udata uconn rtype cookie data `(,prockey procparam))))
-
-(define (ulex-open-db udata dbname)
- #f)
-
-
-;;======================================================================
-;; Ulex db
-;;
-;; - track who is captain, lease expire time
-;; - track who owns what db, lease
-;;
-;;======================================================================
-
-;;
-;;
-(define (ulex-dbfname)
- (let ((dbdir (conc (get-environment-variable "HOME") "/.ulex")))
- (if (not (file-exists? dbdir))
- (create-directory dbdir #t))
- (conc dbdir "/network.db")))
-
-;; always goes in ~/.ulex/network.db
-;; role is captain, adjutant, node
-;;
-(define (ulexdb-setup)
- (let* ((dbfname (ulex-dbfname))
- (have-db (file-exists? dbfname))
- (db (sqlite3:open-database dbfname)))
- (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
- (sqlite3:execute db "PRAGMA synchronous = 0;")
- (if (not have-db)
- (sqlite3:with-transaction
- db
- (lambda ()
- (for-each
- (lambda (stmt)
- (if stmt (sqlite3:execute db stmt)))
- `("CREATE TABLE IF NOT EXISTS nodes
- (id INTEGER PRIMARY KEY,
- role TEXT NOT NULL,
- host TEXT NOT NULL,
- port TEXT NOT NULL,
- ipadr TEXT NOT NULL,
- pid INTEGER NOT NULL,
- zcard TEXT NOT NULL,
- regtime INTEGER DEFAULT (strftime('%s','now')),
- lease_thru INTEGER DEFAULT (strftime('%s','now')),
- last_update INTEGER DEFAULT (strftime('%s','now')));"
- "CREATE TRIGGER IF NOT EXISTS update_nodes_trigger AFTER UPDATE ON nodes
- FOR EACH ROW
- BEGIN
- UPDATE nodes SET last_update=(strftime('%s','now'))
- WHERE id=old.id;
- END;"
- "CREATE TABLE IF NOT EXISTS dbs
- (id INTEGER PRIMARY KEY,
- dbname TEXT NOT NULL,
- dbfile TEXT NOT NULL,
- dbtype TEXT NOT NULL,
- host_port TEXT NOT NULL,
- regtime INTEGER DEFAULT (strftime('%s','now')),
- lease_thru INTEGER DEFAULT (strftime('%s','now')),
- last_update INTEGER DEFAULT (strftime('%s','now')));"
- "CREATE TRIGGER IF NOT EXISTS update_dbs_trigger AFTER UPDATE ON dbs
- FOR EACH ROW
- BEGIN
- UPDATE dbs SET last_update=(strftime('%s','now'))
- WHERE id=old.id;
- END;")))))
- db))
-
-(define (get-host-port-lease db dbfname)
- (sqlite3:fold-row
- (lambda (rem host-port lease-thru)
- (list host-port lease-thru))
- #f db "SELECT host_port,lease_thru FROM dbs WHERE dbfile = ?" dbfname))
-
-(define (register-captain db host ipadr port pid zcard #!key (lease 20))
- (let* ((dbfname (ulex-dbfname))
- (host-port (conc host ":" port)))
- (sqlite3:with-transaction
- db
- (lambda ()
- (match (get-host-port-lease db dbfname)
- ((host-port lease-thru)
- (if (> (current-seconds) lease-thru)
- (begin
- (sqlite3:execute db "UPDATE dbs SET host_port=?,lease_thru=? WHERE dbname=?"
- (conc host ":" port)
- (+ (current-seconds) lease)
- dbfname)
- #t)
- #f))
- (#f (sqlite3:execute db "INSERT INTO dbs (dbname,dbfile,dbtype,host_port,lease_thru) VALUES (?,?,?,?,?)"
- "captain" dbfname "captain" host-port (+ (current-seconds) lease)))
- (else (print "ERROR: Unrecognised result from fold-row")
- (exit 1)))))))
-
+(module ulex
+ *
+ #;(
+
+ ;; NOTE: looking for the handler proc - find the run-listener :)
+
+ run-listener ;; (run-listener handler-proc [port]) => uconn
+
+ ;; NOTE: handler-proc params;
+ ;; (handler-proc rem-host-port qrykey cmd params)
+
+ send-receive ;; (send-receive uconn host-port cmd data)
+
+ ;; NOTE: cmd can be any plain text symbol except for these;
+ ;; 'ping 'ack 'goodbye 'response
+
+ set-work-handler ;; (set-work-handler proc)
+
+ wait-and-close ;; (wait-and-close uconn)
+
+ ulex-listener?
+
+ ;; needed to get the interface:port that was automatically found
+ udat-port
+ udat-host-port
+
+ ;; for testing only
+ ;; pp-uconn
+
+ ;; parameters
+ work-method ;; parameter; 'threads, 'mailbox, 'limited, 'direct
+ return-method ;; parameter; 'mailbox, 'polling, 'direct
+ )
+
+(import scheme
+ chicken.base
+ chicken.file
+ chicken.io
+ chicken.time
+ chicken.condition
+ chicken.string
+ chicken.sort
+ chicken.pretty-print
+
+ address-info
+ mailbox
+ matchable
+ ;; queues
+ regex
+ regex-case
+ simple-exceptions
+ s11n
+ srfi-1
+ srfi-18
+ srfi-4
+ srfi-69
+ system-information
+ tcp6
+ typed-records
+ )
+
+;; udat struct, used by both caller and callee
+;; instantiated as uconn by convention
+;;
+(defstruct udat
+ ;; the listener side
+ (port #f)
+ (host-port #f)
+ (socket #f)
+ ;; the peers
+ (peers (make-hash-table)) ;; host:port->peer
+ ;; work handling
+ (work-queue (make-mailbox))
+ (work-proc #f) ;; set by user
+ (cnum 0) ;; cookie number
+ (mboxes (make-hash-table)) ;; for the replies
+ (avail-cmboxes '()) ;; list of ( . ) for re-use
+ ;; threads
+ (numthreads 10)
+ (cmd-thread #f)
+ (work-queue-thread #f)
+ (num-threads-running 0)
+ )
+
+;; Parameters
+
+;; work-method:
+(define work-method (make-parameter 'mailbox))
+;; mailbox - all rdat goes through mailbox
+;; threads - all rdat immediately executed in new thread
+;; direct - no queuing
+;;
+
+;; return-method, return the result to waiting send-receive:
+(define return-method (make-parameter 'mailbox))
+;; mailbox - create a mailbox and use it for passing returning results to send-receive
+;; polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result
+;; direct - no queuing, result is passed back in single tcp connection
+;;
+
+;; ;; struct for keeping track of others we are talking to
+;; ;;
+;; (defstruct pdat
+;; (host-port #f)
+;; (conns '()) ;; list of pcon structs, pop one off when calling the peer
+;; )
+;;
+;; ;; struct for peer connections, keep track of expiration etc.
+;; ;;
+;; (defstruct pcon
+;; (inp #f)
+;; (oup #f)
+;; (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59)
+;; (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes
+;; )
+
+;;======================================================================
+;; listener
+;;======================================================================
+
+;; is uconn a ulex connector (listener)
+;;
+(define (ulex-listener? uconn)
+ (udat? uconn))
+
+;; create a tcp listener and return a populated udat struct with
+;; my port, address, hostname, pid etc.
+;; return #f if fail to find a port to allocate.
+;;
+;; if udata-in is #f create the record
+;; if there is already a serv-listener return the udata
+;;
+(define (setup-listener uconn #!optional (port 4242))
+ (handle-exceptions
+ exn
+ (if (< port 65535)
+ (setup-listener uconn (+ port 1))
+ #f)
+ (connect-listener uconn port)))
+
+(define (connect-listener uconn port)
+ ;; (tcp-listener-socket LISTENER)(socket-name so)
+ ;; sockaddr-address, sockaddr-port, sockaddr->string
+ (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
+ (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
+ (udat-port-set! uconn port)
+ (udat-host-port-set! uconn (conc addr":"port))
+ (udat-socket-set! uconn tlsn)
+ uconn))
+
+;; run-listener does all the work of starting a listener in a thread
+;; it then returns control
+;;
+(define (run-listener handler-proc #!optional (port-suggestion 4242))
+ (let* ((uconn (make-udat)))
+ (udat-work-proc-set! uconn handler-proc)
+ (if (setup-listener uconn port-suggestion)
+ (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop"))
+ (th2 (make-thread (lambda ()
+ (case (work-method)
+ ((mailbox limited)
+ (process-work-queue uconn))))
+ "Ulex work queue processor")))
+ ;; (tcp-buffer-size 2048)
+ (thread-start! th1)
+ (thread-start! th2)
+ (udat-cmd-thread-set! uconn th1)
+ (udat-work-queue-thread-set! uconn th2)
+ (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".")
+ uconn)
+ (assert #f "ERROR: run-listener called without proper setup."))))
+
+(define (wait-and-close uconn)
+ (thread-join! (udat-cmd-thread uconn))
+ (tcp-close (udat-socket uconn)))
+
+;;======================================================================
+;; peers and connections
+;;======================================================================
+
+(define *send-mutex* (make-mutex))
+
+;; send structured data to recipient
+;;
+;; NOTE: qrykey is what was called the "cookie" previously
+;;
+;; retval tells send to expect and wait for return data (one line) and return it or time out
+;; this is for ping where we don't want to necessarily have set up our own server yet.
+;;
+;; NOTE: see below for beginnings of code to allow re-use of tcp connections
+;; - I believe (without substantial evidence) that re-using connections will
+;; be beneficial ...
+;;
+(define (send udata host-port qrykey cmd params)
+ (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this
+ (isme #f #;(equal? host-port my-host-port)) ;; calling myself?
+ ;; dat is a self-contained work block that can be sent or handled locally
+ (dat (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
+ (cond
+ (isme (ulex-handler udata dat)) ;; no transmission needed
+ (else
+ (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
+ exn
+ (message exn)
+ (begin
+ ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ (let-values (((inp oup)(tcp-connect host-port)))
+ (let ((res (if (and inp oup)
+ (begin
+ (serialize dat oup)
+ (close-output-port oup)
+ (deserialize inp)
+ )
+ (begin
+ (print "ERROR: send called but no receiver has been setup. Please call setup first!")
+ #f))))
+ (close-input-port inp)
+ ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ res)))))))) ;; res will always be 'ack unless return-method is direct
+
+(define (send-via-polling uconn host-port cmd data)
+ (let* ((qrykey (make-cookie uconn))
+ (sres (send uconn host-port qrykey cmd data)))
+ (case sres
+ ((ack)
+ (let loop ((start-time (current-milliseconds)))
+ (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
+ (begin
+ (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
+ #f)
+ (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
+ (if result ;; result is '(status . result-data) or #f for nothing yet
+ (begin
+ (hash-table-delete! (udat-mboxes uconn) qrykey)
+ (cdr result))
+ (begin
+ (thread-sleep! 0.01)
+ (loop start-time)))))))
+ (else
+ (print "ULEX ERROR: Communication failed? sres="sres)
+ #f))))
+
+(define (send-via-mailbox uconn host-port cmd data)
+ (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
+ (qrykey (car cmbox))
+ (mbox (cdr cmbox))
+ (mbox-time (current-milliseconds))
+ (sres (send uconn host-port qrykey cmd data))) ;; short res
+ (if (eq? sres 'ack) ;; BUG: change to be less than server:expiration-timeout?
+ (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread)))
+ #f
+ 120)) ;; timeout)
+ (mbox-timeout-result 'MBOX_TIMEOUT)
+ (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
+ (mbox-receive-time (current-milliseconds)))
+ ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
+ (hash-table-delete! (udat-mboxes uconn) qrykey)
+ (if (eq? res 'MBOX_TIMEOUT)
+ (begin
+ (print "WARNING: mbox timed out for query "cmd", with data "data
+ ", waiting for response from "host-port".")
+
+ ;; here it might make sense to clean up connection records and force clean start?
+ ;; NO. The progam using ulex needs to do the reset. Right thing here is exception
+
+ #f) ;; convert to raising exception?
+ res))
+ (begin
+ (print "ERROR: Communication failed? Got "sres)
+ #f))))
+
+;; send a request to the given host-port and register a mailbox in udata
+;; wait for the mailbox data and return it
+;;
+(define (send-receive uconn host-port cmd data)
+ (let* ((start-time (current-milliseconds))
+ (result (cond
+ ((member cmd '(ping goodbye)) ;; these are immediate
+ (send uconn host-port 'ping cmd data))
+ ((eq? (work-method) 'direct)
+ ;; the result from send will be the actual result, not an 'ack
+ (send uconn host-port 'direct cmd data))
+ (else
+ (case (return-method)
+ ((polling)
+ (send-via-polling uconn host-port cmd data))
+ ((mailbox)
+ (send-via-mailbox uconn host-port cmd data))
+ (else
+ (print "ULEX ERROR: unrecognised return-method "(return-method)".")
+ #f)))))
+ (duration (- (current-milliseconds) start-time)))
+ ;; this is ONLY for development and debugging. It will be removed once Ulex is stable.
+ (if (< 5000 duration)
+ (print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000)))
+ " seconds; "cmd", host-port="host-port", data="data))
+ result))
+
+
+;;======================================================================
+;; responder side
+;;======================================================================
+
+;; take a request, rdat, and if not immediate put it in the work queue
+;;
+;; Reserved cmds; ack ping goodbye response
+;;
+(define (ulex-handler uconn rdat)
+ (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
+ (match rdat ;; (string-split controldat)
+ ((rem-host-port qrykey cmd params);; timedata)
+ ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
+ (case cmd
+ ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
+ ((ping)
+ ;; (print "Got Ping!")
+ ;; (add-to-work-queue uconn rdat)
+ 'ack)
+ ((goodbye)
+ ;; just clear out references to the caller. NOT COMPLETE
+ (add-to-work-queue uconn rdat)
+ 'ack)
+ ((response) ;; this is a result from remote processing, send it as mail ...
+ (case (return-method)
+ ((polling)
+ (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params))
+ 'ack)
+ ((mailbox)
+ (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f)))
+ (if mbox
+ (begin
+ (mailbox-send! mbox params) ;; params here is our result
+ 'ack)
+ (begin
+ (print "ERROR: received result but no associated mbox for cookie "qrykey)
+ 'no-mbox-found))))
+ (else (print "ULEX ERROR: unrecognised return-method "(return-method))
+ 'bad-return-method)))
+ (else ;; generic request - hand it to the work queue
+ (add-to-work-queue uconn rdat)
+ 'ack)))
+ (else
+ (print "ULEX ERROR: bad rdat "rdat)
+ 'bad-rdat)))
+
+;; given an already set up uconn start the cmd-loop
+;;
+(define (ulex-cmd-loop uconn)
+ (let* ((serv-listener (udat-socket uconn))
+ (listener (lambda ()
+ (let loop ((state 'start))
+ (let-values (((inp oup)(tcp-accept serv-listener)))
+ ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params)
+ (resp (ulex-handler uconn rdat)))
+ (serialize resp oup)
+ (close-input-port inp)
+ (close-output-port oup)
+ ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+ )
+ (loop state))))))
+ ;; start N of them
+ (let loop ((thnum 0)
+ (threads '()))
+ (if (< thnum 100)
+ (let* ((th (make-thread listener (conc "listener" thnum))))
+ (thread-start! th)
+ (loop (+ thnum 1)
+ (cons th threads)))
+ (map thread-join! threads)))))
+
+;; add a proc to the cmd list, these are done symetrically (i.e. in all instances)
+;; so that the proc can be dereferenced remotely
+;;
+(define (set-work-handler uconn proc)
+ (udat-work-proc-set! uconn proc))
+
+;;======================================================================
+;; work queues - this is all happening on the listener side
+;;======================================================================
+
+;; rdat is (rem-host-port qrykey cmd params)
+
+(define (add-to-work-queue uconn rdat)
+ #;(queue-add! (udat-work-queue uconn) rdat)
+ (case (work-method)
+ ((threads)
+ (thread-start! (make-thread (lambda ()
+ (do-work uconn rdat))
+ "worker thread")))
+ ((mailbox)
+ (mailbox-send! (udat-work-queue uconn) rdat))
+ ((direct)
+ (do-work uconn rdat))
+ (else
+ (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.")
+ (mailbox-send! (udat-work-queue uconn) rdat))))
+
+;; move the logic to return the result somewhere else?
+;;
+(define (do-work uconn rdat)
+ (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change
+ ;; put this following into a do-work procedure
+ (match rdat
+ ((rem-host-port qrykey cmd params)
+ (let* ((start-time (current-milliseconds))
+ (result (proc rem-host-port qrykey cmd params))
+ (end-time (current-milliseconds))
+ (run-time (- end-time start-time)))
+ (case (work-method)
+ ((direct) result)
+ (else
+ (if (> run-time 1000)(print "ULEX: Warning, work "cmd", "params" done in "run-time" ms"))
+ ;; send 'response as cmd and result as params
+ (send uconn rem-host-port qrykey 'response result) ;; could check for ack
+ (let* ((duration (- (current-milliseconds) end-time)))
+ (if (> duration 500)(print "ULEX: Warning, response sent back to "rem-host-port" for "qrykey" in "duration"ms")))))))
+ (MBOX_TIMEOUT 'do-work-timeout)
+ (else
+ (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))))
+
+;; NEW APPROACH:
+;;
+(define (process-work-queue uconn)
+ (let ((wqueue (udat-work-queue uconn))
+ (proc (udat-work-proc uconn))
+ (numthr (udat-numthreads uconn)))
+ (let loop ((thnum 1)
+ (threads '()))
+ (let ((thlst (cons (make-thread (lambda ()
+ (let work-loop ()
+ (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT)))
+ (do-work uconn rdat))
+ (work-loop)))
+ (conc "work thread " thnum))
+ threads)))
+ (if (< thnum numthr)
+ (loop (+ thnum 1)
+ thlst)
+ (begin
+ (print "ULEX: Starting "(length thlst)" worker threads.")
+ (map thread-start! thlst)
+ (print "ULEX: Threads started. Joining all.")
+ (map thread-join! thlst)))))))
+
+;; below was to enable re-use of connections. This seems non-trivial so for
+;; now lets open on each call
+;;
+;; ;; given host-port get or create peer struct
+;; ;;
+;; (define (udat-get-peer uconn host-port)
+;; (or (hash-table-ref/default (udat-peers uconn) host-port #f)
+;; ;; no peer, so create pdat and init it
+;;
+;; ;; NEED stack of connections, pop and use; inp, oup,
+;; ;; creation_time (remove and create new if over 24hrs old
+;; ;;
+;; (let ((pdat (make-pdat host-port: host-port)))
+;; (hash-table-set! (udat-peers uconn) host-port pdat)
+;; pdat)))
+;;
+;; ;; is pcon alive
+;;
+;; ;; given host-port and pdat get a pcon
+;; ;;
+;; (define (pdat-get-pcon pdat host-port)
+;; (let loop ((conns (pdat-conns pdat)))
+;; (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later
+;; (init-pcon (make-pcon))
+;; (let* ((conn (pop conns)))
+;;
+;; ;; given host-port get a pcon struct
+;; ;;
+;; (define (udat-get-pcon
+
+;;======================================================================
+;; misc utils
+;;======================================================================
+
+(define (make-cookie uconn)
+ (let ((newcnum (+ (udat-cnum uconn) 1)))
+ (udat-cnum-set! uconn newcnum)
+ (conc (udat-host-port uconn) ":"
+ newcnum)))
+
+;; cookie/mboxes
+
+;; we store each mbox with a cookie ( . )
+;;
+(define (get-cmbox uconn)
+ (if (null? (udat-avail-cmboxes uconn))
+ (let ((cookie (make-cookie uconn))
+ (mbox (make-mailbox)))
+ (hash-table-set! (udat-mboxes uconn) cookie mbox)
+ `(,cookie . ,mbox))
+ (let ((cmbox (car (udat-avail-cmboxes uconn))))
+ (udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn)))
+ cmbox)))
+
+(define (put-cmbox uconn cmbox)
+ (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn))))
+
+(define (pp-uconn uconn)
+ (pp (udat->alist uconn)))
+
+
;;======================================================================
;; network utilities
;;======================================================================
+
+;; NOTE: Look at address-info egg as alternative to some of this
(define (rate-ip ipaddr)
(regex-case ipaddr
( "^127\\..*" _ 0 )
( "^(10\\.0|192\\.168)\\..*" _ 1 )
@@ -354,1899 +545,26 @@
;; Change this to bias for addresses with a reasonable broadcast value?
;;
(define (ip-pref-less? a b)
(> (rate-ip a) (rate-ip b)))
-
(define (get-my-best-address)
- (let ((all-my-addresses (get-all-ips))
- ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))
- )
+ (let ((all-my-addresses (get-all-ips)))
(cond
((null? all-my-addresses)
(get-host-name)) ;; no interfaces?
((eq? (length all-my-addresses) 1)
(car all-my-addresses)) ;; only one to choose from, just go with it
-
(else
- (car (sort all-my-addresses ip-pref-less?)))
- ;; (else
- ;; (ip->string (car (filter (lambda (x) ;; take any but 127.
- ;; (not (eq? (u8vector-ref x 0) 127)))
- ;; all-my-addresses))))
-
- )))
+ (car (sort all-my-addresses ip-pref-less?))))))
(define (get-all-ips-sorted)
(sort (get-all-ips) ip-pref-less?))
(define (get-all-ips)
- (map ip->string (vector->list
- (hostinfo-addresses
- (host-information (current-hostname))))))
-
-(define (udat-my-host-port udata)
- (if (and (udat-my-address udata)(udat-my-port udata))
- (conc (udat-my-address udata) ":" (udat-my-port udata))
- #f))
-
-(define (udat-captain-host-port udata)
- (if (and (udat-captain-address udata)(udat-captain-port udata))
- (conc (udat-captain-address udata) ":" (udat-captain-port udata))
- #f))
-
-(define (udat-get-peer udata host-port)
- (hash-table-ref/default (udat-peers udata) host-port #f))
-
-;; struct for keeping track of others we are talking to
-
-(defstruct peer
- (addr-port #f)
- (hostname #f)
- (pid #f)
- ;; (inp #f)
- ;; (oup #f)
- (dbs '()) ;; list of databases this peer is currently handling
- )
-
-(defstruct work
- (peer-dat #f)
- (handlerkey #f)
- (qrykey #f)
- (data #f)
- (start (current-milliseconds)))
-
-#;(defstruct dbowner
- (pdat #f)
- (last-update (current-seconds)))
-
-;;======================================================================
-;; Captain functions
-;;======================================================================
-
-;; NB// This needs to be started in a thread
-;;
-;; setup to be a captain
-;; - local server MUST be started already
-;; - create pkt
-;; - start server port handler
-;;
-(define (setup-as-captain udata)
- (if (create-captain-pkt udata)
- (let* ((my-addr (udat-my-address udata))
- (my-port (udat-my-port udata))
- (th (make-thread (lambda ()
- (ulex-handler-loop udata)) "Captain handler")))
- (udat-handler-thread-set! udata th)
- (udat-captain-address-set! udata my-addr)
- (udat-captain-port-set! udata my-port)
- (thread-start! th))
- (begin
- (print "ERROR: failed to create captain pkt")
- #f)))
-
-;; given a pkts dir read
-;;
-(define (get-all-captain-pkts udata)
- (let* ((pktsdir (let ((d (udat-cpkts-dir udata)))
- (if (file-exists? d)
- d
- (begin
- (create-directory d #t)
- d))))
- (all-pkt-files (glob (conc pktsdir "/*.pkt")))
- (pkt-spec (udat-cpkt-spec udata)))
- (map (lambda (pkt-file)
- (read-pkt->alist pkt-file pktspec: pkt-spec))
- all-pkt-files)))
-
-;; sort by D then Z, return one, choose the oldest then
-;; differentiate if needed using the Z key
-;;l
-(define (get-winning-pkt pkts)
- (if (null? pkts)
- #f
- (car (sort pkts (lambda (a b)
- (let ((ad (string->number (alist-ref 'D a)))
- (bd (string->number (alist-ref 'D b))))
- (if (eq? a b)
- (let ((az (alist-ref 'Z a))
- (bz (alist-ref 'Z b)))
- (string>=? az bz))
- (> ad bd))))))))
-
-;; put the host, ip, port and pid into a pkt in
-;; the captain pkts dir
-;; - assumes user has already fired up a server
-;; which will be in the udata struct
-;;
-(define (create-captain-pkt udata)
- (if (not (udat-serv-listener udata))
- (begin
- (print "ERROR: create-captain-pkt called with out a listener")
- #f)
- (let* ((pktdat `((port . ,(udat-my-port udata))
- (host . ,(udat-my-hostname udata))
- (ipaddr . ,(udat-my-address udata))
- (pid . ,(udat-my-pid udata))))
- (pktdir (udat-cpkts-dir udata))
- (pktspec (udat-cpkt-spec udata))
- )
- (udat-my-cpkt-key-set!
- udata
- (write-alist->pkt
- pktdir
- pktdat
- pktspec: pktspec
- ptype: 'captain))
- (udat-my-cpkt-key udata))))
-
-;; remove pkt associated with captn (the Z key .pkt)
-;;
-(define (remove-captain-pkt udata captn)
- (let ((Z (alist-ref 'Z captn))
- (cpktdir (udat-cpkts-dir udata)))
- (delete-file* (conc cpktdir "/" Z ".pkt"))))
-
-;; call all known peers and tell them to delete their info on the captain
-;; thus forcing them to re-read pkts and connect to a new captain
-;; call this when the captain needs to exit and if an older captain is
-;; detected. Due to delays in sending file meta data in NFS multiple
-;; captains can be initiated in a "Storm of Captains", book soon to be
-;; on Amazon
-;;
-(define (drop-captain udata)
- (let* ((peers (hash-table-keys (udat-peers udata)))
- (cookie (make-cookie udata)))
- (for-each
- (lambda (host-port)
- (send udata host-port 'dropcaptain cookie "nomsg" retval: #t))
- peers)))
-
-;;======================================================================
-;; server primitives
-;;======================================================================
-
-(define (make-cookie udata)
- (let ((newcnum (+ (udat-cnum udata) 1)))
- (udat-cnum-set! udata newcnum)
- (conc (udat-my-address udata) ":"
- (udat-my-port udata) "-"
- (udat-my-pid udata) "-"
- newcnum)))
-
-;; create a tcp listener and return a populated udat struct with
-;; my port, address, hostname, pid etc.
-;; return #f if fail to find a port to allocate.
-;;
-;; if udata-in is #f create the record
-;; if there is already a serv-listener return the udata
-;;
-(define (start-server-find-port udata-in #!optional (port 4242))
- (let ((udata (or udata-in (make-udat))))
- (if (udat-serv-listener udata) ;; TODO - add check that the listener is alive and ready?
- udata
- (handle-exceptions
- exn
- (if (< port 65535)
- (start-server-find-port udata (+ port 1))
- #f)
- (connect-server udata port)))))
-
-(define (connect-server udata port)
- ;; (tcp-listener-socket LISTENER)(socket-name so)
- ;; sockaddr-address, sockaddr-port, sockaddr->string
- (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
- (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
- (udat-my-address-set! udata addr)
- (udat-my-port-set! udata port)
- (udat-my-hostname-set! udata (get-host-name))
- (udat-serv-listener-set! udata tlsn)
- udata))
-
-(define (get-peer-dat udata host-port #!optional (hostname #f)(pid #f))
- (let* ((pdat (or (udat-get-peer udata host-port)
- (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE SPECIFIC
- exn
- #f
- (let ((npdat (make-peer addr-port: host-port)))
- (if hostname (peer-hostname-set! npdat hostname))
- (if pid (peer-pid-set! npdat pid))
- npdat)))))
- pdat))
-
-;; send structured data to recipient
-;;
-;; NOTE: qrykey is what was called the "cookie" previously
-;;
-;; retval tells send to expect and wait for return data (one line) and return it or time out
-;; this is for ping where we don't want to necessarily have set up our own server yet.
-;;
-(define (send udata host-port handler qrykey data
- #!key (hostname #f)(pid #f)(params '())(retval #f))
- (let* ((my-host-port (udat-my-host-port udata))
- (isme (equal? host-port my-host-port)) ;; am I calling
- ;; myself?
- (dat (list
- handler ;; " "
- my-host-port ;; " "
- (udat-my-pid udata) ;; " "
- qrykey
- params ;;(if (null? params) "" (conc " "
- ;;(string-intersperse params " ")))
- )))
- ;; (print "send isme is " (if isme "true!" "false!") ",
- ;; my-host-port: " my-host-port ", host-port: " host-port)
- (if isme
- (ulex-handler udata dat data)
- (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE
- ;; SPECIFIC
- exn
- #f
- (let-values (((inp oup)(tcp-connect host-port)))
- ;;
- ;; CONTROL LINE:
- ;; handlerkey host:port pid qrykey params ...
- ;;
- (let ((res
- (if (and inp oup)
- (let* ()
- (if my-host-port
- (begin
- (write dat oup)
- (write data oup) ;; send as sexpr
- ;; (print "Sent dat: " dat " data: " data)
- (if retval
- (read inp)
- #t))
- (begin
- (print "ERROR: send called but no receiver has been setup. Please call setup first!")
- #f))
- ;; NOTE: DO NOT BE TEMPTED TO LOOK AT ANY DATA ON INP HERE!
- ;; (there is a listener for handling that)
- )
- #f))) ;; #f means failed to connect and send
- (close-input-port inp)
- (close-output-port oup)
- res))))))
-
-;; send a request to the given host-port and register a mailbox in udata
-;; wait for the mailbox data and return it
-;;
-(define (send-receive udata host-port handler qrykey data #!key (hostname #f)(pid #f)(params '())(timeout 20))
- (let ((mbox (make-mailbox))
- (mbox-time (current-milliseconds))
- (mboxes (udat-mboxes udata)))
- (hash-table-set! mboxes qrykey mbox)
- (if (send udata host-port handler qrykey data hostname: hostname pid: pid params: params)
- (let* ((mbox-timeout-secs timeout)
- (mbox-timeout-result 'MBOX_TIMEOUT)
- (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
- (mbox-receive-time (current-milliseconds)))
- (hash-table-delete! mboxes qrykey)
- (if (eq? res 'MBOX_TIMEOUT)
- #f
- res))
- #f))) ;; #f means failed to communicate
-
-;;
-(define (ulex-handler udata controldat data)
- (print "controldat: " controldat " data: " data)
- (match controldat ;; (string-split controldat)
- ((handlerkey host-port pid qrykey params ...)
- ;; (print "handlerkey: " handlerkey " host-port: " host-port " pid: " pid " qrykey: " qrykey " params: " params)
- (case handlerkey ;; (string->symbol handlerkey)
- ((ack)(print "Got ack!"))
- ((ping) ;; special case - return result immediately on the same connection
- (let* ((proc (hash-table-ref/default (udat-handlers udata) 'ping #f))
- (val (if proc (proc) "gotping"))
- (peer (make-peer addr-port: host-port pid: pid))
- (dbshash (udat-dbowners udata)))
- (peer-dbs-set! peer params) ;; params for ping is list of dbs owned by pinger
- (for-each (lambda (dbfile)
- (hash-table-set! dbshash dbfile host-port)) ;; WRONG?
- params) ;; register each db in the dbshash
- (if (not (hash-table-exists? (udat-peers udata) host-port))
- (hash-table-set! (udat-peers udata) host-port peer)) ;; save the details of this caller in peers
- qrykey)) ;; End of ping
- ((goodbye)
- ;; remove all traces of the caller in db ownership etc.
- (let* ((peer (hash-table-ref/default (udat-peers udata) host-port #f))
- (dbs (if peer (peer-dbs peer) '()))
- (dbshash (udat-dbowners udata)))
- (for-each (lambda (dbfile)(hash-table-delete! dbshash dbfile)) dbs)
- (hash-table-delete! (udat-peers udata) host-port)
- qrykey))
- ((dropcaptain)
- ;; remove all traces of the captain
- (udat-captain-address-set! udata #f)
- (udat-captain-host-set! udata #f)
- (udat-captain-port-set! udata #f)
- (udat-captain-pid-set! udata #f)
- qrykey)
- ((rucaptain) ;; remote is asking if I'm the captain
- (if (udat-my-cpkt-key udata) "yes" "no"))
- ((db-owner) ;; given a db name who do I send my queries to
- ;; look up the file in handlers, if have an entry ping them to be sure
- ;; they are still alive and then return that host:port.
- ;; if no handler found or if the ping fails pick from peers the oldest that
- ;; is managing the fewest dbs
- (match params
- ((dbfile dbtype)
- (let* ((owner-host-port (hash-table-ref/default (udat-dbowners udata) dbfile #f)))
- (if owner-host-port
- (conc qrykey " " owner-host-port)
- (let* ((pdat (or (hash-table-ref/default (udat-peers udata) host-port #f) ;; no owner - caller gets to own it!
- (make-peer addr-port: host-port pid: pid dbs: `(,dbfile)))))
- (hash-table-set! (udat-peers udata) host-port pdat)
- (hash-table-set! (udat-dbowners udata) dbfile host-port)
- (conc qrykey " " host-port)))))
- (else (conc qrykey " BADDATA"))))
- ;; for work items:
- ;; handler is one of; immediate, read-only, read-write, high-priority
- ((immediate read-only normal low-priority) ;; do this work immediately
- ;; host-port (caller), pid (caller), qrykey (cookie), params <= all from first line
- ;; data => a single line encoded however you want, or should I build json into it?
- (print "handlerkey=" handlerkey)
- (let* ((pdat (get-peer-dat udata host-port)))
- (match params ;; dbfile prockey procparam
- ((dbfile prockey procparam)
- (case handlerkey
- ((immediate read-only)
- (process-request udata pdat dbfile qrykey prockey procparam data))
- ((normal low-priority) ;; split off later and add logic to support low priority
- (add-to-work-queue udata pdat dbfile qrykey prockey procparam data))
- (else
- #f)))
- (else
- (print "INFO: params=" params " handlerkey=" handlerkey " controldat=" controldat)
- #f))))
- (else
- ;; (add-to-work-queue udata (get-peer-dat udata host-port) handlerkey qrykey data)
- #f)))
- (else
- (print "BAD DATA? controldat=" controldat " data=" data)
- #f)));; handles the incoming messages and dispatches to queues
-
-;;
-(define (ulex-handler-loop udata)
- (let* ((serv-listener (udat-serv-listener udata)))
- ;; data comes as two lines
- ;; handlerkey resp-addr:resp-port hostname pid qrykey [dbpath/dbfile.db]
- ;; data
- (let loop ((state 'start))
- (let-values (((inp oup)(tcp-accept serv-listener)))
- (let* ((controldat (read inp))
- (data (read inp))
- (resp (ulex-handler udata controldat data)))
- (if resp (write resp oup))
- (close-input-port inp)
- (close-output-port oup))
- (loop state)))))
-
-;; add a proc to the handler list, these are done symetrically (i.e. in all instances)
-;; so that the proc can be dereferenced remotely
-;;
-(define (register-handler udata key proc)
- (hash-table-set! (udat-handlers udata) key proc))
-
-
-;;======================================================================
-;; work queues
-;;======================================================================
-
-(define (add-to-work-queue udata peer-dat handlerkey qrykey data)
- (let ((wdat (make-work peer-dat: peer-dat handlerkey: handlerkey qrykey: qrykey data: data)))
- (if (udat-busy udata)
- (queue-add! (udat-work-queue udata) wdat)
- (process-work udata wdat)) ;; passing in wdat tells process-work to first process the passed in wdat
- ))
-
-(define (do-work udata wdat)
- #f)
-
-(define (process-work udata #!optional wdat)
- (if wdat (do-work udata wdat)) ;; process wdat
- (let ((wqueue (udat-work-queue udata)))
- (if (not (queue-empty? wqueue))
- (let loop ((wd (queue-remove! wqueue)))
- (do-work udata wd)
- (if (not (queue-empty? wqueue))
- (loop (queue-remove! wqueue)))))))
-
-;;======================================================================
-;; Generic db handling
-;; setup a inmem db instance
-;; open connection to on-disk db
-;; sync on-disk db to inmem
-;; get lock in on-disk db for dbowner of this db
-;; put sync-proc, init-proc, on-disk handle, inmem handle in dbconn stuct
-;; return the stuct
-;;======================================================================
-
-(defstruct dbconn
- (fname #f)
- (inmem #f)
- (conn #f)
- (sync #f) ;; sync proc
- (init #f) ;; init proc
- (lastsync (current-seconds))
- )
-
-(defstruct dbinfo
- (initproc #f)
- (syncproc #f))
-
-;; open inmem and disk database
-;; init with initproc
-;; return db struct
-;;
-;; appname; megatest, ulex or something else.
-;;
-(define (setup-db-connection udata fname-in appname dbtype)
- (let* ((is-ulex (eq? appname 'ulex))
- (dbinf (if is-ulex ;; ulex is a built-in special case
- (make-dbinfo initproc: ulexdb-init syncproc: ulexdb-sync)
- (hash-table-ref/default (udat-dbtypes udata) dbtype #f)))
- (initproc (dbinfo-initproc dbinf))
- (syncproc (dbinfo-syncproc dbinf))
- (fname (if is-ulex
- (conc (udat-ulex-dir udata) "/ulex.db")
- fname-in))
- (inmem-db (open-and-initdb udata #f 'inmem (dbinfo-initproc dbinf)))
- (disk-db (open-and-initdb udata fname 'disk (dbinfo-initproc dbinf))))
- (make-dbconn inmem: inmem-db conn: disk-db sync: syncproc init: initproc)))
-
-;; dest='inmem or 'disk
-;;
-(define (open-and-initdb udata filename dest init-proc)
- (let* ((inmem (eq? dest 'inmem))
- (dbfile (if inmem
- ":INMEM:"
- filename))
- (dbexists (if inmem #t (file-exists? dbfile)))
- (db (sqlite3:open-database dbfile)))
- (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
- (if (not dbexists)
- (init-proc db))
- db))
-
-
-;;======================================================================
-;; Previous Ulex db stuff
-;;======================================================================
-
-(define (ulexdb-init db inmem)
- (sqlite3:with-transaction
- db
- (lambda ()
- (for-each
- (lambda (stmt)
- (if stmt (sqlite3:execute db stmt)))
- `("CREATE TABLE IF NOT EXISTS processes
- (id INTEGER PRIMARY KEY,
- host TEXT NOT NULL,
- ipadr TEXT NOT NULL,
- port INTEGER NOT NULL,
- pid INTEGER NOT NULL,
- regtime INTEGER DEFAULT (strftime('%s','now')),
- last_update INTEGER DEFAULT (strftime('%s','now')));"
- (if inmem
- "CREATE TRIGGER IF NOT EXISTS update_proces_trigger AFTER UPDATE ON processes
- FOR EACH ROW
- BEGIN
- UPDATE processes SET last_update=(strftime('%s','now'))
- WHERE id=old.id;
- END;"
- #f))))))
-
-;; open databases, do initial sync
-(define (ulexdb-sync dbconndat udata)
- #f)
-
-
-) ;; END OF ULEX
-
-
-;;; ;;======================================================================
-;;; ;; D E B U G H E L P E R S
-;;; ;;======================================================================
-;;;
-;;; (define (dbg> . args)
-;;; (with-output-to-port (current-error-port)
-;;; (lambda ()
-;;; (apply print "dbg> " args))))
-;;;
-;;; (define (debug-pp . args)
-;;; (if (get-environment-variable "ULEX_DEBUG")
-;;; (with-output-to-port (current-error-port)
-;;; (lambda ()
-;;; (apply pp args)))))
-;;;
-;;; (define *default-debug-port* (current-error-port))
-;;;
-;;; (define (sdbg> fn stage-name stage-start stage-end start-time . message)
-;;; (if (get-environment-variable "ULEX_DEBUG")
-;;; (with-output-to-port *default-debug-port*
-;;; (lambda ()
-;;; (apply print "ulex:" fn " " stage-name " took " (- (if stage-end stage-end (current-milliseconds)) stage-start) " ms. "
-;;; (if start-time
-;;; (conc "total time " (- (current-milliseconds) start-time)
-;;; " ms.")
-;;; "")
-;;; message
-;;; )))))
-
-;;======================================================================
-;; M A C R O S
-;;======================================================================
-;; iup callbacks are not dumping the stack, this is a work-around
-;;
-
-;; Some of these routines use:
-;;
-;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html
-;;
-;; Syntax for defining macros in a simple style similar to function definiton,
-;; when there is a single pattern for the argument list and there are no keywords.
-;;
-;; (define-simple-syntax (name arg ...) body ...)
-;;
-;;
-;; (define-syntax define-simple-syntax
-;; (syntax-rules ()
-;; ((_ (name arg ...) body ...)
-;; (define-syntax name (syntax-rules () ((name arg ...) (begin body ...)))))))
-;;
-;; (define-simple-syntax (catch-and-dump proc procname)
-;; (handle-exceptions
-;; exn
-;; (begin
-;; (print-call-chain (current-error-port))
-;; (with-output-to-port (current-error-port)
-;; (lambda ()
-;; (print ((condition-property-accessor 'exn 'message) exn))
-;; (print "Callback error in " procname)
-;; (print "Full condition info:\n" (condition->list exn)))))
-;; (proc)))
-;;
-;;
-;;======================================================================
-;; R E C O R D S
-;;======================================================================
-
-;;; ;; information about me as a server
-;;; ;;
-;;; (defstruct area
-;;; ;; about this area
-;;; (useportlogger #f)
-;;; (lowport 32768)
-;;; (server-type 'auto) ;; auto=create up to five servers/pkts, main=create pkts, passive=no pkt (unless there are no pkts at all)
-;;; (conn #f)
-;;; (port #f)
-;;; (myaddr (get-my-best-address))
-;;; pktid ;; get pkt from hosts table if needed
-;;; pktfile
-;;; pktsdir
-;;; dbdir
-;;; (dbhandles (make-hash-table)) ;; fname => list-of-dbh, NOTE: Should really never need more than one?
-;;; (mutex (make-mutex))
-;;; (rtable (make-hash-table)) ;; registration table of available actions
-;;; (dbs (make-hash-table)) ;; filename => random number, used for choosing what dbs I serve
-;;; ;; about other servers
-;;; (hosts (make-hash-table)) ;; key => hostdat
-;;; (hoststats (make-hash-table)) ;; key => alist of fname => ( qcount . qtime )
-;;; (reqs (make-hash-table)) ;; uri => queue
-;;; ;; work queues
-;;; (wqueues (make-hash-table)) ;; fname => qdat
-;;; (stats (make-hash-table)) ;; fname => totalqueries
-;;; (last-srvup (current-seconds)) ;; last time we updated the known servers
-;;; (cookie2mbox (make-hash-table)) ;; map cookie for outstanding request to mailbox of awaiting call
-;;; (ready #f)
-;;; (health (make-hash-table)) ;; ipaddr:port => num failed pings since last good ping
-;;; )
-;;;
-;;; ;; host stats
-;;; ;;
-;;; (defstruct hostdat
-;;; (pkt #f)
-;;; (dbload (make-hash-table)) ;; "dbfile.db" => queries/min
-;;; (hostload #f) ;; normalized load ( 5min load / numcpus )
-;;; )
-;;;
-;;; ;; dbdat
-;;; ;;
-;;; (defstruct dbdat
-;;; (dbh #f)
-;;; (fname #f)
-;;; (write-access #f)
-;;; (sths (make-hash-table)) ;; hash mapping query strings to handles
-;;; )
-;;;
-;;; ;; qdat
-;;; ;;
-;;; (defstruct qdat
-;;; (writeq (make-queue))
-;;; (readq (make-queue))
-;;; (rwq (make-queue))
-;;; (logq (make-queue)) ;; do we need a queue for logging? yes, if we use sqlite3 db for logging
-;;; (osshort (make-queue))
-;;; (oslong (make-queue))
-;;; (misc (make-queue)) ;; used for things like ping-full
-;;; )
-;;;
-;;; ;; calldat
-;;; ;;
-;;; (defstruct calldat
-;;; (ctype 'dbwrite)
-;;; (obj #f) ;; this would normally be an SQL statement e.g. SELECT, INSERT etc.
-;;; (rtime (current-milliseconds)))
-;;;
-;;; ;; make it a global? Well, it is local to area module
-;;;
-;;; (define *pktspec*
-;;; `((server (hostname . h)
-;;; (port . p)
-;;; (pid . i)
-;;; (ipaddr . a)
-;;; )
-;;; (data (hostname . h) ;; sender hostname
-;;; (port . p) ;; sender port
-;;; (ipaddr . a) ;; sender ip
-;;; (hostkey . k) ;; sending host key - store info at server under this key
-;;; (servkey . s) ;; server key - this needs to match at server end or reject the msg
-;;; (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json
-;;; (data . d) ;; base64 encoded slln data
-;;; )))
-;;;
-;;; ;; work item
-;;; ;;
-;;; (defstruct witem
-;;; (rhost #f) ;; return host
-;;; (ripaddr #f) ;; return ipaddr
-;;; (rport #f) ;; return port
-;;; (servkey #f) ;; the packet representing the client of this workitem, used by final send-message
-;;; (rdat #f) ;; the request - usually an sql query, type is rdat
-;;; (action #f) ;; the action: immediate, dbwrite, dbread,oslong, osshort
-;;; (cookie #f) ;; cookie id for response
-;;; (data #f) ;; the data payload, i.e. parameters
-;;; (result #f) ;; the result from processing the data
-;;; (caller #f)) ;; the calling peer according to rpc itself
-;;;
-;;; (define (trim-pktid pktid)
-;;; (if (string? pktid)
-;;; (substring pktid 0 4)
-;;; "nopkt"))
-;;;
-;;; (define (any->number num)
-;;; (cond
-;;; ((number? num) num)
-;;; ((string? num) (string->number num))
-;;; (else num)))
-;;;
-;;; (use trace)
-;;; (trace-call-sites #t)
-;;;
-;;; ;;======================================================================
-;;; ;; D A T A B A S E H A N D L I N G
-;;; ;;======================================================================
-;;;
-;;; ;; look in dbhandles for a db, return it, else return #f
-;;; ;;
-;;; (define (get-dbh acfg fname)
-;;; (let ((dbh-lst (hash-table-ref/default (area-dbhandles acfg) fname '())))
-;;; (if (null? dbh-lst)
-;;; (begin
-;;; ;; (print "opening db for " fname)
-;;; (open-db acfg fname)) ;; Note that the handles get put back in the queue in the save-dbh calls
-;;; (let ((rem-lst (cdr dbh-lst)))
-;;; ;; (print "re-using saved connection for " fname)
-;;; (hash-table-set! (area-dbhandles acfg) fname rem-lst)
-;;; (car dbh-lst)))))
-;;;
-;;; (define (save-dbh acfg fname dbdat)
-;;; ;; (print "saving dbh for " fname)
-;;; (hash-table-set! (area-dbhandles acfg) fname (cons dbdat (hash-table-ref/default (area-dbhandles acfg) fname '()))))
-;;;
-;;; ;; open the database, if never before opened init it. put the handle in the
-;;; ;; open db's hash table
-;;; ;; returns: the dbdat
-;;; ;;
-;;; (define (open-db acfg fname)
-;;; (let* ((fullname (conc (area-dbdir acfg) "/" fname))
-;;; (exists (file-exists? fullname))
-;;; (write-access (if exists
-;;; (file-write-access? fullname)
-;;; (file-write-access? (area-dbdir acfg))))
-;;; (db (sqlite3:open-database fullname))
-;;; (handler (sqlite3:make-busy-timeout 136000))
-;;; )
-;;; (sqlite3:set-busy-handler! db handler)
-;;; (sqlite3:execute db "PRAGMA synchronous = 0;")
-;;; (if (not exists) ;; need to init the db
-;;; (if write-access
-;;; (let ((isql (get-rsql acfg 'dbinitsql))) ;; get the init sql statements
-;;; ;; (sqlite3:with-transaction
-;;; ;; db
-;;; ;; (lambda ()
-;;; (if isql
-;;; (for-each
-;;; (lambda (sql)
-;;; (sqlite3:execute db sql))
-;;; isql)))
-;;; (print "ERROR: no write access to " (area-dbdir acfg))))
-;;; (make-dbdat dbh: db fname: fname write-access: write-access)))
-;;;
-;;; ;; This is a low-level command to retrieve or to prepare, save and return a prepared statment
-;;; ;; you must extract the db handle
-;;; ;;
-;;; (define (get-sth db cache stmt)
-;;; (if (hash-table-exists? cache stmt)
-;;; (begin
-;;; ;; (print "Reusing cached stmt for " stmt)
-;;; (hash-table-ref/default cache stmt #f))
-;;; (let ((sth (sqlite3:prepare db stmt)))
-;;; (hash-table-set! cache stmt sth)
-;;; ;; (print "prepared stmt for " stmt)
-;;; sth)))
-;;;
-;;; ;; a little more expensive but does all the tedious deferencing - only use if you don't already
-;;; ;; have dbdat and db sitting around
-;;; ;;
-;;; (define (full-get-sth acfg fname stmt)
-;;; (let* ((dbdat (get-dbh acfg fname))
-;;; (db (dbdat-dbh dbdat))
-;;; (sths (dbdat-sths dbdat)))
-;;; (get-sth db sths stmt)))
-;;;
-;;; ;; write to a db
-;;; ;; acfg: area data
-;;; ;; rdat: request data
-;;; ;; hdat: (host . port)
-;;; ;;
-;;; ;; (define (dbwrite acfg rdat hdat data-in)
-;;; ;; (let* ((dbname (car data-in))
-;;; ;; (dbdat (get-dbh acfg dbname))
-;;; ;; (db (dbdat-dbh dbdat))
-;;; ;; (sths (dbdat-sths dbdat))
-;;; ;; (stmt (calldat-obj rdat))
-;;; ;; (sth (get-sth db sths stmt))
-;;; ;; (data (cdr data-in)))
-;;; ;; (print "dbname: " dbname " acfg: " acfg " rdat: " (calldat->alist rdat) " hdat: " hdat " data: " data)
-;;; ;; (print "dbdat: " (dbdat->alist dbdat))
-;;; ;; (apply sqlite3:execute sth data)
-;;; ;; (save-dbh acfg dbname dbdat)
-;;; ;; #t
-;;; ;; ))
-;;;
-;;; (define (finalize-all-db-handles acfg)
-;;; (let* ((dbhandles (area-dbhandles acfg)) ;; dbhandles is hash of fname ==> dbdat
-;;; (num 0))
-;;; (for-each
-;;; (lambda (area-name)
-;;; (print "Closing handles for " area-name)
-;;; (let ((dbdats (hash-table-ref/default dbhandles area-name '())))
-;;; (for-each
-;;; (lambda (dbdat)
-;;; ;; first close all statement handles
-;;; (for-each
-;;; (lambda (sth)
-;;; (sqlite3:finalize! sth)
-;;; (set! num (+ num 1)))
-;;; (hash-table-values (dbdat-sths dbdat)))
-;;; ;; now close the dbh
-;;; (set! num (+ num 1))
-;;; (sqlite3:finalize! (dbdat-dbh dbdat)))
-;;; dbdats)))
-;;; (hash-table-keys dbhandles))
-;;; (print "FINALIZED " num " dbhandles")))
-;;;
-;;; ;;======================================================================
-;;; ;; W O R K Q U E U E H A N D L I N G
-;;; ;;======================================================================
-;;;
-;;; (define (register-db-as-mine acfg dbname)
-;;; (let ((ht (area-dbs acfg)))
-;;; (if (not (hash-table-ref/default ht dbname #f))
-;;; (hash-table-set! ht dbname (random 10000)))))
-;;;
-;;; (define (work-queue-add acfg fname witem)
-;;; (let* ((work-queue-start (current-milliseconds))
-;;; (action (witem-action witem)) ;; NB the action is the index into the rdat actions
-;;; (qdat (or (hash-table-ref/default (area-wqueues acfg) fname #f)
-;;; (let ((newqdat (make-qdat)))
-;;; (hash-table-set! (area-wqueues acfg) fname newqdat)
-;;; newqdat)))
-;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f)))
-;;; (if rdat
-;;; (queue-add!
-;;; (case (calldat-ctype rdat)
-;;; ((dbwrite) (register-db-as-mine acfg fname)(qdat-writeq qdat))
-;;; ((dbread) (register-db-as-mine acfg fname)(qdat-readq qdat))
-;;; ((dbrw) (register-db-as-mine acfg fname)(qdat-rwq qdat))
-;;; ((oslong) (qdat-oslong qdat))
-;;; ((osshort) (qdat-osshort qdat))
-;;; ((full-ping) (qdat-misc qdat))
-;;; (else
-;;; (print "ERROR: no queue for " action ". Adding to dbwrite queue.")
-;;; (qdat-writeq qdat)))
-;;; witem)
-;;; (case action
-;;; ((full-ping)(qdat-misc qdat))
-;;; (else
-;;; (print "ERROR: No action " action " was registered"))))
-;;; (sdbg> "work-queue-add" "queue-add" work-queue-start #f #f)
-;;; #t)) ;; for now, simply return #t to indicate request got to the queue
-;;;
-;;; (define (doqueue acfg q fname dbdat dbh)
-;;; ;; (print "doqueue: " fname)
-;;; (let* ((start-time (current-milliseconds))
-;;; (qlen (queue-length q)))
-;;; (if (> qlen 1)
-;;; (print "Processing queue of length " qlen))
-;;; (let loop ((count 0)
-;;; (responses '()))
-;;; (let ((delta (- (current-milliseconds) start-time)))
-;;; (if (or (queue-empty? q)
-;;; (> delta 400)) ;; stop working on this queue after 400ms have passed
-;;; (list count delta responses) ;; return count, delta and responses list
-;;; (let* ((witem (queue-remove! q))
-;;; (action (witem-action witem))
-;;; (rdat (witem-rdat witem))
-;;; (stmt (calldat-obj rdat))
-;;; (sth (full-get-sth acfg fname stmt))
-;;; (ctype (calldat-ctype rdat))
-;;; (data (witem-data witem))
-;;; (cookie (witem-cookie witem)))
-;;; ;; do the processing and save the result in witem-result
-;;; (witem-result-set!
-;;; witem
-;;; (case ctype ;; action
-;;; ((noblockwrite) ;; blind write, no ack of success returned
-;;; (apply sqlite3:execute sth data)
-;;; (sqlite3:last-insert-rowid dbh))
-;;; ((dbwrite) ;; blocking write
-;;; (apply sqlite3:execute sth data)
-;;; #t)
-;;; ((dbread) ;; TODO: consider breaking this up and shipping in pieces for large query
-;;; (apply sqlite3:map-row (lambda x x) sth data))
-;;; ((full-ping) 'full-ping)
-;;; (else (print "Not ready for action " action) #f)))
-;;; (loop (add1 count)
-;;; (if cookie
-;;; (cons witem responses)
-;;; responses))))))))
-;;;
-;;; ;; do up to 400ms of processing on each queue
-;;; ;; - the work-queue-processor will allow the max 1200ms of work to complete but it will flag as overloaded
-;;; ;;
-;;; (define (process-db-queries acfg fname)
-;;; (if (hash-table-exists? (area-wqueues acfg) fname)
-;;; (let* ((process-db-queries-start-time (current-milliseconds))
-;;; (qdat (hash-table-ref/default (area-wqueues acfg) fname #f))
-;;; (queue-sym->queue (lambda (queue-sym)
-;;; (case queue-sym ;; lookup the queue from qdat given a name (symbol)
-;;; ((wqueue) (qdat-writeq qdat))
-;;; ((rqueue) (qdat-readq qdat))
-;;; ((rwqueue) (qdat-rwq qdat))
-;;; ((misc) (qdat-misc qdat))
-;;; (else #f))))
-;;; (dbdat (get-dbh acfg fname))
-;;; (dbh (if (dbdat? dbdat)(dbdat-dbh dbdat) #f))
-;;; (nowtime (current-seconds)))
-;;; ;; handle the queues that require a transaction
-;;; ;;
-;;; (map ;;
-;;; (lambda (queue-sym)
-;;; ;; (print "processing queue " queue-sym)
-;;; (let* ((queue (queue-sym->queue queue-sym)))
-;;; (if (not (queue-empty? queue))
-;;; (let ((responses
-;;; (sqlite3:with-transaction ;; todo - catch exceptions...
-;;; dbh
-;;; (lambda ()
-;;; (let* ((res (doqueue acfg queue fname dbdat dbh))) ;; this does the work!
-;;; ;; (print "res=" res)
-;;; (match res
-;;; ((count delta responses)
-;;; (update-stats acfg fname queue-sym delta count)
-;;; (sdbg> "process-db-queries" "sqlite3-transaction" process-db-queries-start-time #f #f)
-;;; responses) ;; return responses
-;;; (else
-;;; (print "ERROR: bad return data from doqueue " res)))
-;;; )))))
-;;; ;; having completed the transaction, send the responses.
-;;; ;; (print "INFO: sending " (length responses) " responses.")
-;;; (let loop ((responses-left responses))
-;;; (cond
-;;; ((null? responses-left) #t)
-;;; (else
-;;; (let* ((witem (car responses-left))
-;;; (response (cdr responses-left)))
-;;; (call-deliver-response acfg (witem-ripaddr witem)(witem-rport witem)
-;;; (witem-cookie witem)(witem-result witem)))
-;;; (loop (cdr responses-left))))))
-;;; )))
-;;; '(wqueue rwqueue rqueue))
-;;;
-;;; ;; handle misc queue
-;;; ;;
-;;; ;; (print "processing misc queue")
-;;; (let ((queue (queue-sym->queue 'misc)))
-;;; (doqueue acfg queue fname dbdat dbh))
-;;; ;; ....
-;;; (save-dbh acfg fname dbdat)
-;;; #t ;; just to let the tests know we got here
-;;; )
-;;; #f ;; nothing processed
-;;; ))
-;;;
-;;; ;; run all queues in parallel per db but sequentially per queue for that db.
-;;; ;; - process the queues every 500 or so ms
-;;; ;; - allow for long running queries to continue but all other activities for that
-;;; ;; db will be blocked.
-;;; ;;
-;;; (define (work-queue-processor acfg)
-;;; (let* ((threads (make-hash-table))) ;; fname => thread
-;;; (let loop ((fnames (hash-table-keys (area-wqueues acfg)))
-;;; (target-time (+ (current-milliseconds) 50)))
-;;; ;;(if (not (null? fnames))(print "Processing for these databases: " fnames))
-;;; (for-each
-;;; (lambda (fname)
-;;; ;; (print "processing for " fname)
-;;; ;;(process-db-queries acfg fname))
-;;; (let ((th (hash-table-ref/default threads fname #f)))
-;;; (if (and th (not (member (thread-state th) '(dead terminated))))
-;;; (begin
-;;; (print "WARNING: worker thread for " fname " is taking a long time.")
-;;; (print "Thread is in state " (thread-state th)))
-;;; (let ((th1 (make-thread (lambda ()
-;;; ;; (catch-and-dump
-;;; ;; (lambda ()
-;;; ;; (print "Process queries for " fname)
-;;; (let ((start-time (current-milliseconds)))
-;;; (process-db-queries acfg fname)
-;;; ;; (thread-sleep! 0.01) ;; need the thread to take at least some time
-;;; (hash-table-delete! threads fname)) ;; no mutexes?
-;;; fname)
-;;; "th1"))) ;; ))
-;;; (hash-table-set! threads fname th1)
-;;; (thread-start! th1)))))
-;;; fnames)
-;;; ;; (thread-sleep! 0.1) ;; give the threads some time to process requests
-;;; ;; burn time until 400ms is up
-;;; (let ((now-time (current-milliseconds)))
-;;; (if (< now-time target-time)
-;;; (let ((delta (- target-time now-time)))
-;;; (thread-sleep! (/ delta 1000)))))
-;;; (loop (hash-table-keys (area-wqueues acfg))
-;;; (+ (current-milliseconds) 50)))))
-;;;
-;;; ;;======================================================================
-;;; ;; S T A T S G A T H E R I N G
-;;; ;;======================================================================
-;;;
-;;; (defstruct stat
-;;; (qcount-avg 0) ;; coarse running average
-;;; (qtime-avg 0) ;; coarse running average
-;;; (qcount 0) ;; total
-;;; (qtime 0) ;; total
-;;; (last-qcount 0) ;; last
-;;; (last-qtime 0) ;; last
-;;; (dbs '()) ;; list of db files handled by this node
-;;; (when 0)) ;; when the last query happened - seconds
-;;;
-;;;
-;;; (define (update-stats acfg fname bucket duration numqueries)
-;;; (let* ((key fname) ;; for now do not use bucket. Was: (conc fname "-" bucket)) ;; lazy but good enough
-;;; (stats (or (hash-table-ref/default (area-stats acfg) key #f)
-;;; (let ((newstats (make-stat)))
-;;; (hash-table-set! (area-stats acfg) key newstats)
-;;; newstats))))
-;;; ;; when the last query happended (used to remove the fname from the active list)
-;;; (stat-when-set! stats (current-seconds))
-;;; ;; last values
-;;; (stat-last-qcount-set! stats numqueries)
-;;; (stat-last-qtime-set! stats duration)
-;;; ;; total over process lifetime
-;;; (stat-qcount-set! stats (+ (stat-qcount stats) numqueries))
-;;; (stat-qtime-set! stats (+ (stat-qtime stats) duration))
-;;; ;; coarse average
-;;; (stat-qcount-avg-set! stats (/ (+ (stat-qcount-avg stats) numqueries) 2))
-;;; (stat-qtime-avg-set! stats (/ (+ (stat-qtime-avg stats) duration) 2))
-;;;
-;;; ;; here is where we add the stats for a given dbfile
-;;; (if (not (member fname (stat-dbs stats)))
-;;; (stat-dbs-set! stats (cons fname (stat-dbs stats))))
-;;;
-;;; ))
-;;;
-;;; ;;======================================================================
-;;; ;; S E R V E R S T U F F
-;;; ;;======================================================================
-;;;
-;;; ;; this does NOT return!
-;;; ;;
-;;; (define (find-free-port-and-open acfg)
-;;; (let ((port (or (area-port acfg) 3200)))
-;;; (handle-exceptions
-;;; exn
-;;; (begin
-;;; (print "INFO: cannot bind to port " (rpc:default-server-port) ", trying next port")
-;;; (area-port-set! acfg (+ port 1))
-;;; (find-free-port-and-open acfg))
-;;; (rpc:default-server-port port)
-;;; (area-port-set! acfg port)
-;;; (tcp-read-timeout 120000)
-;;; ;; ((rpc:make-server (tcp-listen port)) #t)
-;;; (tcp-listen (rpc:default-server-port)
-;;; ))))
-;;;
-;;; ;; register this node by putting a packet into the pkts dir.
-;;; ;; look for other servers
-;;; ;; contact other servers and compile list of servers
-;;; ;; there are two types of server
-;;; ;; main servers - dashboards, runners and dedicated servers - need pkt
-;;; ;; passive servers - test executers, step calls, list-runs - no pkt
-;;; ;;
-;;; (define (register-node acfg hostip port-num)
-;;; ;;(mutex-lock! (area-mutex acfg))
-;;; (let* ((server-type (area-server-type acfg)) ;; auto, main, passive (no pkt created)
-;;; (best-ip (or hostip (get-my-best-address)))
-;;; (mtdir (area-dbdir acfg))
-;;; (pktdir (area-pktsdir acfg))) ;; conc mtdir "/.server-pkts")))
-;;; (print "Registering node " best-ip ":" port-num)
-;;; (if (not mtdir) ;; require a home for this node to put or find databases
-;;; #f
-;;; (begin
-;;; (if (not (directory? pktdir))(create-directory pktdir))
-;;; ;; server is started, now create pkt if needed
-;;; (print "Starting server in " server-type " mode with port " port-num)
-;;; (if (member server-type '(auto main)) ;; TODO: if auto, count number of servers registers, if > 3 then don't put out a pkt
-;;; (begin
-;;; (area-pktid-set! acfg
-;;; (write-alist->pkt
-;;; pktdir
-;;; `((hostname . ,(get-host-name))
-;;; (ipaddr . ,best-ip)
-;;; (port . ,port-num)
-;;; (pid . ,(current-process-id)))
-;;; pktspec: *pktspec*
-;;; ptype: 'server))
-;;; (area-pktfile-set! acfg (conc pktdir "/" (area-pktid acfg) ".pkt"))))
-;;; (area-port-set! acfg port-num)
-;;; #;(mutex-unlock! (area-mutex acfg))))))
-;;;
-;;; (define *cookie-seqnum* 0)
-;;; (define (make-cookie key)
-;;; (set! *cookie-seqnum* (add1 *cookie-seqnum*))
-;;; ;;(print "MAKE COOKIE CALLED -- on "servkey"-"*cookie-seqnum*)
-;;; (conc key "-" *cookie-seqnum*)
-;;; )
-;;;
-;;; ;; dispatch locally if possible
-;;; ;;
-;;; (define (call-deliver-response acfg ipaddr port cookie data)
-;;; (if (and (equal? (area-myaddr acfg) ipaddr)
-;;; (equal? (area-port acfg) port))
-;;; (deliver-response acfg cookie data)
-;;; ((rpc:procedure 'response ipaddr port) cookie data)))
-;;;
-;;; (define (deliver-response acfg cookie data)
-;;; (let ((deliver-response-start (current-milliseconds)))
-;;; (thread-start! (make-thread
-;;; (lambda ()
-;;; (let loop ((tries-left 5))
-;;; ;;(print "TOP OF DELIVER_RESPONSE LOOP; triesleft="tries-left)
-;;; ;;(pp (hash-table->alist (area-cookie2mbox acfg)))
-;;; (let* ((mbox (hash-table-ref/default (area-cookie2mbox acfg) cookie #f)))
-;;; (cond
-;;; ((eq? 0 tries-left)
-;;; (print "ulex:deliver-response: I give up. Mailbox never appeared. cookie="cookie)
-;;; )
-;;; (mbox
-;;; ;;(print "got mbox="mbox" got data="data" send.")
-;;; (mailbox-send! mbox data))
-;;; (else
-;;; ;;(print "no mbox yet. look for "cookie)
-;;; (thread-sleep! (/ (- 6 tries-left) 10))
-;;; (loop (sub1 tries-left))))))
-;;; ;; (debug-pp (list (conc "ulex:deliver-response took " (- (current-milliseconds) deliver-response-start) " ms, cookie=" cookie " data=") data))
-;;; (sdbg> "deliver-response" "mailbox-send" deliver-response-start #f #f cookie)
-;;; )
-;;; (conc "deliver-response thread for cookie="cookie))))
-;;; #t)
-;;;
-;;; ;; action:
-;;; ;; immediate - quick actions, no need to put in queues
-;;; ;; dbwrite - put in dbwrite queue
-;;; ;; dbread - put in dbread queue
-;;; ;; oslong - os actions, e.g. du, that could take a long time
-;;; ;; osshort - os actions that should be quick, e.g. df
-;;; ;;
-;;; (define (request acfg from-ipaddr from-port servkey action cookie fname params) ;; std-peer-handler
-;;; ;; NOTE: Use rpc:current-peer for getting return address
-;;; (let* ((std-peer-handler-start (current-milliseconds))
-;;; ;; (raw-data (alist-ref 'data dat))
-;;; (rdat (hash-table-ref/default
-;;; (area-rtable acfg) action #f)) ;; this looks up the sql query or other details indexed by the action
-;;; (witem (make-witem ripaddr: from-ipaddr ;; rhost: from-host
-;;; rport: from-port action: action
-;;; rdat: rdat cookie: cookie
-;;; servkey: servkey data: params ;; TODO - rename data to params
-;;; caller: (rpc:current-peer))))
-;;; (if (not (equal? servkey (area-pktid acfg)))
-;;; `(#f . ,(conc "I don't know you servkey=" servkey ", pktid=" (area-pktid acfg))) ;; immediately return this
-;;; (let* ((ctype (if rdat
-;;; (calldat-ctype rdat) ;; is this necessary? these should be identical
-;;; action)))
-;;; (sdbg> "std-peer-handler" "immediate" std-peer-handler-start #f #f)
-;;; (case ctype
-;;; ;; (dbwrite acfg rdat (cons from-ipaddr from-port) data)))
-;;; ((full-ping) `(#t "ack to full ping" ,(work-queue-add acfg fname witem) ,cookie))
-;;; ((response) `(#t "ack from requestor" ,(deliver-response acfg fname params)))
-;;; ((dbwrite) `(#t "db write submitted" ,(work-queue-add acfg fname witem) ,cookie))
-;;; ((dbread) `(#t "db read submitted" ,(work-queue-add acfg fname witem) ,cookie ))
-;;; ((dbrw) `(#t "db read/write submitted" ,cookie))
-;;; ((osshort) `(#t "os short submitted" ,cookie))
-;;; ((oslong) `(#t "os long submitted" ,cookie))
-;;; (else `(#f "unrecognised action" ,ctype)))))))
-;;;
-;;; ;; Call this to start the actual server
-;;; ;;
-;;; ;; start_server
-;;; ;;
-;;; ;; mode: '
-;;; ;; handler: proc which takes pktrecieved as argument
-;;; ;;
-;;;
-;;; (define (start-server acfg)
-;;; (let* ((conn (find-free-port-and-open acfg))
-;;; (port (area-port acfg)))
-;;; (rpc:publish-procedure!
-;;; 'delist-db
-;;; (lambda (fname)
-;;; (hash-table-delete! (area-dbs acfg) fname)))
-;;; (rpc:publish-procedure!
-;;; 'calling-addr
-;;; (lambda ()
-;;; (rpc:current-peer)))
-;;; (rpc:publish-procedure!
-;;; 'ping
-;;; (lambda ()(real-ping acfg)))
-;;; (rpc:publish-procedure!
-;;; 'request
-;;; (lambda (from-addr from-port servkey action cookie dbname params)
-;;; (request acfg from-addr from-port servkey action cookie dbname params)))
-;;; (rpc:publish-procedure!
-;;; 'response
-;;; (lambda (cookie res-dat)
-;;; (deliver-response acfg cookie res-dat)))
-;;; (area-ready-set! acfg #t)
-;;; (area-conn-set! acfg conn)
-;;; ((rpc:make-server conn) #f)));; ((tcp-listen (rpc:default-server-port)) #t)
-;;;
-;;;
-;;; (define (launch acfg) ;; #!optional (proc std-peer-handler))
-;;; (print "starting launch")
-;;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers)
-;;; #;(let ((original-handler (current-exception-handler))) ;; is th
-;;; (lambda (exception)
-;;; (server-exit-procedure)
-;;; (original-handler exception)))
-;;; (on-exit (lambda ()
-;;; (shutdown acfg))) ;; (finalize-all-db-handles acfg)))
-;;; ;; set up the rpc handler
-;;; (let* ((th1 (make-thread
-;;; (lambda ()(start-server acfg))
-;;; "server thread"))
-;;; (th2 (make-thread
-;;; (lambda ()
-;;; (print "th2 starting")
-;;; (let loop ()
-;;; (work-queue-processor acfg)
-;;; (print "work-queue-processor crashed!")
-;;; (loop)))
-;;; "work queue thread")))
-;;; (thread-start! th1)
-;;; (thread-start! th2)
-;;; (let loop ()
-;;; (thread-sleep! 0.025)
-;;; (if (area-ready acfg)
-;;; #t
-;;; (loop)))
-;;; ;; attempt to fix my address
-;;; (let* ((all-addr (get-all-ips-sorted))) ;; could use (tcp-addresses conn)?
-;;; (let loop ((rem-addrs all-addr))
-;;; (if (null? rem-addrs)
-;;; (begin
-;;; (print "ERROR: Failed to figure out the ip address of myself as a server. Giving up.")
-;;; (exit 1)) ;; BUG Changeme to raising an exception
-;;;
-;;; (let* ((addr (car rem-addrs))
-;;; (good-addr (handle-exceptions
-;;; exn
-;;; #f
-;;; ((rpc:procedure 'calling-addr addr (area-port acfg))))))
-;;; (if good-addr
-;;; (begin
-;;; (print "Got good-addr of " good-addr)
-;;; (area-myaddr-set! acfg good-addr))
-;;; (loop (cdr rem-addrs)))))))
-;;; (register-node acfg (area-myaddr acfg)(area-port acfg))
-;;; (print "INFO: Server started on " (area-myaddr acfg) ":" (area-port acfg))
-;;; ;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers)
-;;; ))
-;;;
-;;; (define (clear-server-pkt acfg)
-;;; (let ((pktf (area-pktfile acfg)))
-;;; (if pktf (delete-file* pktf))))
-;;;
-;;; (define (shutdown acfg)
-;;; (let (;;(conn (area-conn acfg))
-;;; (pktf (area-pktfile acfg))
-;;; (port (area-port acfg)))
-;;; (if pktf (delete-file* pktf))
-;;; (send-all "imshuttingdown")
-;;; ;; (rpc:close-all-connections!) ;; don't know if this is actually needed
-;;; (finalize-all-db-handles acfg)))
-;;;
-;;; (define (send-all msg)
-;;; #f)
-;;;
-;;; ;; given a area record look up all the packets
-;;; ;;
-;;; (define (get-all-server-pkts acfg)
-;;; (let ((all-pkt-files (glob (conc (area-pktsdir acfg) "/*.pkt"))))
-;;; (map (lambda (pkt-file)
-;;; (read-pkt->alist pkt-file pktspec: *pktspec*))
-;;; all-pkt-files)))
-;;;
-;;; #;((Z . "9a0212302295a19610d5796fce0370fa130758e9")
-;;; (port . "34827")
-;;; (pid . "28748")
-;;; (hostname . "zeus")
-;;; (T . "server")
-;;; (D . "1549427032.0"))
-;;;
-;;; #;(define (get-my-best-address)
-;;; (let ((all-my-addresses (get-all-ips))) ;; (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))))
-;;; (cond
-;;; ((null? all-my-addresses)
-;;; (get-host-name)) ;; no interfaces?
-;;; ((eq? (length all-my-addresses) 1)
-;;; (ip->string (car all-my-addresses))) ;; only one to choose from, just go with it
-;;; (else
-;;; (ip->string (car (filter (lambda (x) ;; take any but 127.
-;;; (not (eq? (u8vector-ref x 0) 127)))
-;;; all-my-addresses)))))))
-;;;
-;;; ;; whoami? I am my pkt
-;;; ;;
-;;; (define (whoami? acfg)
-;;; (hash-table-ref/default (area-hosts acfg)(area-pktid acfg) #f))
-;;;
-;;; ;;======================================================================
-;;; ;; "Client side" operations
-;;; ;;======================================================================
-;;;
-;;; (define (safe-call call-key host port . params)
-;;; (handle-exceptions
-;;; exn
-;;; (begin
-;;; (print "Call " call-key " to " host ":" port " failed")
-;;; #f)
-;;; (apply (rpc:procedure call-key host port) params)))
-;;;
-;;; ;; ;; convert to/from string / sexpr
-;;; ;;
-;;; ;; (define (string->sexpr str)
-;;; ;; (if (string? str)
-;;; ;; (with-input-from-string str read)
-;;; ;; str))
-;;; ;;
-;;; ;; (define (sexpr->string s)
-;;; ;; (with-output-to-string (lambda ()(write s))))
-;;;
-;;; ;; is the server alive?
-;;; ;;
-;;; (define (ping acfg host port)
-;;; (let* ((myaddr (area-myaddr acfg))
-;;; (myport (area-port acfg))
-;;; (start-time (current-milliseconds))
-;;; (res (if (and (equal? myaddr host)
-;;; (equal? myport port))
-;;; (real-ping acfg)
-;;; ((rpc:procedure 'ping host port)))))
-;;; (cons (- (current-milliseconds) start-time)
-;;; res)))
-;;;
-;;; ;; returns ( ipaddr port alist-fname=>randnum )
-;;; (define (real-ping acfg)
-;;; `(,(area-myaddr acfg) ,(area-port acfg) ,(get-host-stats acfg)))
-;;;
-;;; ;; is the server alive AND the queues processing?
-;;; ;;
-;;; #;(define (full-ping acfg servpkt)
-;;; (let* ((start-time (current-milliseconds))
-;;; (res (send-message acfg servpkt '(full-ping) 'full-ping)))
-;;; (cons (- (current-milliseconds) start-time)
-;;; res))) ;; (equal? res "got ping"))))
-;;;
-;;;
-;;; ;; look up all pkts and get the server id (the hash), port, host/ip
-;;; ;; store this info in acfg
-;;; ;; return the number of responsive servers found
-;;; ;;
-;;; ;; DO NOT VERIFY THAT THE SERVER IS ALIVE HERE. This is called at times where the current server is not yet alive and cannot ping itself
-;;; ;;
-;;; (define (update-known-servers acfg)
-;;; ;; readll all pkts
-;;; ;; foreach pkt; if it isn't me ping the server; if alive, add to hosts hash, else rm the pkt
-;;; (let* ((start-time (current-milliseconds))
-;;; (all-pkts (delete-duplicates
-;;; (append (get-all-server-pkts acfg)
-;;; (hash-table-values (area-hosts acfg)))))
-;;; (hostshash (area-hosts acfg))
-;;; (my-id (area-pktid acfg))
-;;; (pktsdir (area-pktsdir acfg)) ;; needed to remove pkts from non-responsive servers
-;;; (numsrvs 0)
-;;; (delpkt (lambda (pktsdir sid)
-;;; (print "clearing out server " sid)
-;;; (delete-file* (conc pktsdir "/" sid ".pkt"))
-;;; (hash-table-delete! hostshash sid))))
-;;; (area-last-srvup-set! acfg (current-seconds))
-;;; (for-each
-;;; (lambda (servpkt)
-;;; (if (list? servpkt)
-;;; ;; (pp servpkt)
-;;; (let* ((shost (alist-ref 'ipaddr servpkt))
-;;; (sport (any->number (alist-ref 'port servpkt)))
-;;; (res (handle-exceptions
-;;; exn
-;;; (begin
-;;; ;; (print "INFO: bad server on " shost ":" sport)
-;;; #f)
-;;; (ping acfg shost sport)))
-;;; (sid (alist-ref 'Z servpkt)) ;; Z code is our name for the server
-;;; (url (conc shost ":" sport))
-;;; )
-;;; #;(if (or (not res)
-;;; (null? res))
-;;; (begin
-;;; (print "STRANGE: ping of " url " gave " res)))
-;;;
-;;; ;; (print "Got " res " from " shost ":" sport)
-;;; (match res
-;;; ((qduration . payload)
-;;; ;; (print "Server pkt:" (alist-ref 'ipaddr servpkt) ":" (alist-ref 'port servpkt)
-;;; ;; (if payload
-;;; ;; "Success" "Fail"))
-;;; (match payload
-;;; ((host port stats)
-;;; ;; (print "From " host ":" port " got stats: " stats)
-;;; (if (and host port stats)
-;;; (let ((url (conc host ":" port)))
-;;; (hash-table-set! hostshash sid servpkt)
-;;; ;; store based on host:port
-;;; (hash-table-set! (area-hoststats acfg) sid stats))
-;;; (print "missing data from the server, not sure what that means!"))
-;;; (set! numsrvs (+ numsrvs 1)))
-;;; (#f
-;;; (print "Removing pkt " sid " due to #f from server or failed ping")
-;;; (delpkt pktsdir sid))
-;;; (else
-;;; (print "Got ")(pp res)(print " from server ")(pp servpkt) " but response did not match (#f/#t . msg)")))
-;;; (else
-;;; ;; here we delete the pkt - can't reach the server, remove it
-;;; ;; however this logic is inadequate. we should mark the server as checked
-;;; ;; and not good, if it happens a second time - then remove the pkt
-;;; ;; or something similar. I.e. don't be too quick to assume the server is wedged or dead
-;;; ;; could be it is simply too busy to reply
-;;; (let ((bad-pings (hash-table-ref/default (area-health acfg) url 0)))
-;;; (if (> bad-pings 1) ;; two bad pings - remove pkt
-;;; (begin
-;;; (print "INFO: " bad-pings " bad responses from " url ", deleting pkt " sid)
-;;; (delpkt pktsdir sid))
-;;; (begin
-;;; (print "INFO: " bad-pings " bad responses from " shost ":" sport " not deleting pkt yet")
-;;; (hash-table-set! (area-health acfg)
-;;; url
-;;; (+ (hash-table-ref/default (area-health acfg) url 0) 1))
-;;; ))
-;;; ))))
-;;; ;; servpkt is not actually a pkt?
-;;; (begin
-;;; (print "Bad pkt " servpkt))))
-;;; all-pkts)
-;;; (sdbg> "update-known-servers" "end" start-time #f #f " found " numsrvs
-;;; " servers, pkts: " (map (lambda (p)
-;;; (alist-ref 'Z p))
-;;; all-pkts))
-;;; numsrvs))
-;;;
-;;; (defstruct srvstat
-;;; (numfiles 0) ;; number of db files handled by this server - subtract 1 for the db being currently looked at
-;;; (randnum #f) ;; tie breaker number assigned to by the server itself - applies only to the db under consideration
-;;; (pkt #f)) ;; the server pkt
-;;;
-;;; ;;(define (srv->srvstat srvpkt)
-;;;
-;;; ;; Get the server best for given dbname and key
-;;; ;;
-;;; ;; NOTE: key is not currently used. The key points to the kind of query, this may be useful for directing read-only queries.
-;;; ;;
-;;; (define (get-best-server acfg dbname key)
-;;; (let* (;; (servers (hash-table-values (area-hosts acfg)))
-;;; (servers (area-hosts acfg))
-;;; (skeys (sort (hash-table-keys servers) string>=?)) ;; a stable listing
-;;; (start-time (current-milliseconds))
-;;; (srvstats (make-hash-table)) ;; srvid => srvstat
-;;; (url (conc (area-myaddr acfg) ":" (area-port acfg))))
-;;; ;; (print "scores for " dbname ": " (map (lambda (k)(cons k (calc-server-score acfg dbname k))) skeys))
-;;; (if (null? skeys)
-;;; (if (> (update-known-servers acfg) 0)
-;;; (get-best-server acfg dbname key) ;; some risk of infinite loop here, TODO add try counter
-;;; (begin
-;;; (print "ERROR: no server found!") ;; since this process is also a server this should never happen
-;;; #f))
-;;; (begin
-;;; ;; (print "in get-best-server with skeys=" skeys)
-;;; (if (> (- (current-seconds) (area-last-srvup acfg)) 10)
-;;; (begin
-;;; (update-known-servers acfg)
-;;; (sdbg> "get-best-server" "update-known-servers" start-time #f #f)))
-;;;
-;;; ;; for each server look at the list of dbfiles, total number of dbs being handled
-;;; ;; and the rand number, save the best host
-;;; ;; also do a delist-db for each server dbfile not used
-;;; (let* ((best-server #f)
-;;; (servers-to-delist (make-hash-table)))
-;;; (for-each
-;;; (lambda (srvid)
-;;; (let* ((server (hash-table-ref/default servers srvid #f))
-;;; (stats (hash-table-ref/default (area-hoststats acfg) srvid '(()))))
-;;; ;; (print "stats: " stats)
-;;; (if server
-;;; (let* ((dbweights (car stats))
-;;; (srvload (length (filter (lambda (x)(not (equal? dbname (car x)))) dbweights)))
-;;; (dbrec (alist-ref dbname dbweights equal?)) ;; get the pair with fname . randscore
-;;; (randnum (if dbrec
-;;; dbrec ;; (cdr dbrec)
-;;; 0)))
-;;; (hash-table-set! srvstats srvid (make-srvstat numfiles: srvload randnum: randnum pkt: server))))))
-;;; skeys)
-;;;
-;;; (let* ((sorted (sort (hash-table-values srvstats)
-;;; (lambda (a b)
-;;; (let ((numfiles-a (srvstat-numfiles a))
-;;; (numfiles-b (srvstat-numfiles b))
-;;; (randnum-a (srvstat-randnum a))
-;;; (randnum-b (srvstat-randnum b)))
-;;; (if (< numfiles-a numfiles-b) ;; Note, I don't think adding an offset works here. Goal was only move file handling to a different server if it has 2 less
-;;; #t
-;;; (if (and (equal? numfiles-a numfiles-b)
-;;; (< randnum-a randnum-b))
-;;; #t
-;;; #f))))))
-;;; (best (if (null? sorted)
-;;; (begin
-;;; (print "ERROR: should never be null due to self as server.")
-;;; #f)
-;;; (srvstat-pkt (car sorted)))))
-;;; #;(print "SERVER(" url "): " dbname ": " (map (lambda (srv)
-;;; (let ((p (srvstat-pkt srv)))
-;;; (conc (alist-ref 'ipaddr p) ":" (alist-ref 'port p)
-;;; "(" (srvstat-numfiles srv)","(srvstat-randnum srv)")")))
-;;; sorted))
-;;; best))))))
-;;;
-;;; ;; send out an "I'm about to exit notice to all known servers"
-;;; ;;
-;;; (define (death-imminent acfg)
-;;; '())
-;;;
-;;; ;;======================================================================
-;;; ;; U L E X - T H E I N T E R E S T I N G S T U F F ! !
-;;; ;;======================================================================
-;;;
-;;; ;; register a handler
-;;; ;; NOTES:
-;;; ;; dbinitsql is reserved for a list of sql statements for initializing the db
-;;; ;; dbinitfn is reserved for a db init function, if exists called after dbinitsql
-;;; ;;
-;;; (define (register acfg key obj #!optional (ctype 'dbwrite))
-;;; (let ((ht (area-rtable acfg)))
-;;; (if (hash-table-exists? ht key)
-;;; (print "WARNING: redefinition of entry " key))
-;;; (hash-table-set! ht key (make-calldat obj: obj ctype: ctype))))
-;;;
-;;; ;; usage: register-batch acfg '((key1 . sql1) (key2 . sql2) ... )
-;;; ;; NB// obj is often an sql query
-;;; ;;
-;;; (define (register-batch acfg ctype data)
-;;; (let ((ht (area-rtable acfg)))
-;;; (map (lambda (dat)
-;;; (hash-table-set! ht (car dat)(make-calldat obj: (cdr dat) ctype: ctype)))
-;;; data)))
-;;;
-;;; (define (initialize-area-calls-from-specfile area specfile)
-;;; (let* ((callspec (with-input-from-file specfile read )))
-;;; (for-each (lambda (group)
-;;; (register-batch
-;;; area
-;;; (car group)
-;;; (cdr group)))
-;;; callspec)))
-;;;
-;;; ;; get-rentry
-;;; ;;
-;;; (define (get-rentry acfg key)
-;;; (hash-table-ref/default (area-rtable acfg) key #f))
-;;;
-;;; (define (get-rsql acfg key)
-;;; (let ((cdat (get-rentry acfg key)))
-;;; (if cdat
-;;; (calldat-obj cdat)
-;;; #f)))
-;;;
-;;;
-;;;
-;;; ;; blocking call:
-;;; ;; client server
-;;; ;; ------ ------
-;;; ;; call()
-;;; ;; send-message()
-;;; ;; nmsg-send()
-;;; ;; nmsg-receive()
-;;; ;; nmsg-respond(ack,cookie)
-;;; ;; ack, cookie
-;;; ;; mbox-thread-wait(cookie)
-;;; ;; nmsg-send(client,cookie,result)
-;;; ;; nmsg-respond(ack)
-;;; ;; return result
-;;; ;;
-;;; ;; reserved action:
-;;; ;; 'immediate
-;;; ;; 'dbinitsql
-;;; ;;
-;;; (define (call acfg dbname action params #!optional (count 0))
-;;; (let* ((call-start-time (current-milliseconds))
-;;; (srv (get-best-server acfg dbname action))
-;;; (post-get-start-time (current-milliseconds))
-;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f))
-;;; (myid (trim-pktid (area-pktid acfg)))
-;;; (srvid (trim-pktid (alist-ref 'Z srv)))
-;;; (cookie (make-cookie myid)))
-;;; (sdbg> "call" "get-best-server" call-start-time #f call-start-time " from: " myid " to server: " srvid " for " dbname " action: " action " params: " params " rdat: " rdat)
-;;; (print "INFO: call to " (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv) " from " (area-myaddr acfg) ":" (area-port acfg) " for " dbname)
-;;; (if (and srv rdat) ;; need both to dispatch a request
-;;; (let* ((ripaddr (alist-ref 'ipaddr srv))
-;;; (rsrvid (alist-ref 'Z srv))
-;;; (rport (any->number (alist-ref 'port srv)))
-;;; (res-full (if (and (equal? ripaddr (area-myaddr acfg))
-;;; (equal? rport (area-port acfg)))
-;;; (request acfg ripaddr rport (area-pktid acfg) action cookie dbname params)
-;;; (safe-call 'request ripaddr rport
-;;; (area-myaddr acfg)
-;;; (area-port acfg)
-;;; #;(area-pktid acfg)
-;;; rsrvid
-;;; action cookie dbname params))))
-;;; ;; (print "res-full: " res-full)
-;;; (match res-full
-;;; ((response-ok response-msg rem ...)
-;;; (let* ((send-message-time (current-milliseconds))
-;;; ;; (match res-full
-;;; ;; ((response-ok response-msg)
-;;; ;; (response-ok (car res-full))
-;;; ;; (response-msg (cadr res-full)
-;;; )
-;;; ;; (res (take res-full 3))) ;; ctype == action, TODO: converge on one term <<=== what was this? BUG
-;;; ;; (print "ulex:call: send-message took " (- send-message-time post-get-start-time) " ms params=" params)
-;;; (sdbg> "call" "send-message" post-get-start-time #f call-start-time)
-;;; (cond
-;;; ((not response-ok) #f)
-;;; ((member response-msg '("db read submitted" "db write submitted"))
-;;; (let* ((cookie-id (cadddr res-full))
-;;; (mbox (make-mailbox))
-;;; (mbox-time (current-milliseconds)))
-;;; (hash-table-set! (area-cookie2mbox acfg) cookie-id mbox)
-;;; (let* ((mbox-timeout-secs 20)
-;;; (mbox-timeout-result 'MBOX_TIMEOUT)
-;;; (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
-;;; (mbox-receive-time (current-milliseconds)))
-;;; (hash-table-delete! (area-cookie2mbox acfg) cookie-id)
-;;; (sdbg> "call" "mailbox-receive" mbox-time #f call-start-time " from: " myid " to server: " srvid " for " dbname)
-;;; ;; (print "ulex:call mailbox-receive took " (- mbox-receive-time mbox-time) "ms params=" params)
-;;; res)))
-;;; (else
-;;; (print "Unhandled response \""response-msg"\"")
-;;; #f))
-;;; ;; depending on what action (i.e. ctype) is we will block here waiting for
-;;; ;; all the data (mechanism to be determined)
-;;; ;;
-;;; ;; if res is a "working on it" then wait
-;;; ;; wait for result
-;;; ;; mailbox thread wait on
-;;;
-;;; ;; if res is a "can't help you" then try a different server
-;;; ;; if res is a "ack" (e.g. for one-shot requests) then return res
-;;; ))
-;;; (else
-;;; (if (< count 10)
-;;; (let* ((url (conc (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv))))
-;;; (thread-sleep! 1)
-;;; (print "ERROR: Bad result from " url ", dbname: " dbname ", action: " action ", params: " params ". Trying again in 1 second.")
-;;; (call acfg dbname action params (+ count 1)))
-;;; (begin
-;;; (error (conc "ERROR: " count " tries, still have improper response res-full=" res-full)))))))
-;;; (begin
-;;; (if (not rdat)
-;;; (print "ERROR: action " action " not registered.")
-;;; (if (< count 10)
-;;; (begin
-;;; (thread-sleep! 1)
-;;; (area-hosts-set! acfg (make-hash-table)) ;; clear out all known hosts
-;;; (print "ERROR: no server found, srv=" srv ", trying again in 1 seconds")
-;;; (call acfg dbname action params (+ count 1)))
-;;; (begin
-;;; (error (conc "ERROR: no server found after 10 tries, srv=" srv ", giving up."))
-;;; #;(error "No server available"))))))))
-;;;
-;;;
-;;; ;;======================================================================
-;;; ;; U T I L I T I E S
-;;; ;;======================================================================
-;;;
-;;; ;; get a signature for identifing this process
-;;; ;;
-;;; (define (get-process-signature)
-;;; (cons (get-host-name)(current-process-id)))
-;;;
-;;; ;;======================================================================
-;;; ;; S Y S T E M S T U F F
-;;; ;;======================================================================
-;;;
-;;; ;; get normalized cpu load by reading from /proc/loadavg and
-;;; ;; /proc/cpuinfo return all three values and the number of real cpus
-;;; ;; and the number of threads returns alist '((adj-cpu-load
-;;; ;; . normalized-proc-load) ... etc. keys: adj-proc-load,
-;;; ;; adj-core-load, 1m-load, 5m-load, 15m-load
-;;; ;;
-;;; (define (get-normalized-cpu-load)
-;;; (let ((res (get-normalized-cpu-load-raw))
-;;; (default `((adj-proc-load . 2) ;; there is no right answer
-;;; (adj-core-load . 2)
-;;; (1m-load . 2)
-;;; (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong
-;;; (15m-load . 0)
-;;; (proc . 1)
-;;; (core . 1)
-;;; (phys . 1)
-;;; (error . #t))))
-;;; (cond
-;;; ((and (list? res)
-;;; (> (length res) 2))
-;;; res)
-;;; ((eq? res #f) default) ;; add messages?
-;;; ((eq? res #f) default) ;; this would be the #eof
-;;; (else default))))
-;;;
-;;; (define (get-normalized-cpu-load-raw)
-;;; (let* ((actual-host (get-host-name))) ;; #f is localhost
-;;; (let ((data (append
-;;; (with-input-from-file "/proc/loadavg" read-lines)
-;;; (with-input-from-file "/proc/cpuinfo" read-lines)
-;;; (list "end")))
-;;; (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
-;;; (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$"))
-;;; (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$"))
-;;; (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$"))
-;;; (max-num (lambda (p n)(max (string->number p) n))))
-;;; ;; (print "data=" data)
-;;; (if (null? data) ;; something went wrong
-;;; #f
-;;; (let loop ((hed (car data))
-;;; (tal (cdr data))
-;;; (loads #f)
-;;; (proc-num 0) ;; processor includes threads
-;;; (phys-num 0) ;; physical chip on motherboard
-;;; (core-num 0)) ;; core
-;;; ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num)
-;;; (if (null? tal) ;; have all our data, calculate normalized load and return result
-;;; (let* ((act-proc (+ proc-num 1))
-;;; (act-phys (+ phys-num 1))
-;;; (act-core (+ core-num 1))
-;;; (adj-proc-load (/ (car loads) act-proc))
-;;; (adj-core-load (/ (car loads) act-core))
-;;; (result
-;;; (append (list (cons 'adj-proc-load adj-proc-load)
-;;; (cons 'adj-core-load adj-core-load))
-;;; (list (cons '1m-load (car loads))
-;;; (cons '5m-load (cadr loads))
-;;; (cons '15m-load (caddr loads)))
-;;; (list (cons 'proc act-proc)
-;;; (cons 'core act-core)
-;;; (cons 'phys act-phys)))))
-;;; result)
-;;; (regex-case
-;;; hed
-;;; (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num))
-;;; (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num))
-;;; (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num))
-;;; (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num)))
-;;; (else
-;;; (begin
-;;; ;; (print "NO MATCH: " hed)
-;;; (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))
-;;;
-;;; (define (get-host-stats acfg)
-;;; (let ((stats-hash (area-stats acfg)))
-;;; ;; use this opportunity to remove references to dbfiles which have not been accessed in a while
-;;; (for-each
-;;; (lambda (dbname)
-;;; (let* ((stats (hash-table-ref stats-hash dbname))
-;;; (last-access (stat-when stats)))
-;;; (if (and (> last-access 0) ;; if zero then there has been no access
-;;; (> (- (current-seconds) last-access) 10)) ;; not used in ten seconds
-;;; (begin
-;;; (print "Removing " dbname " from stats list")
-;;; (hash-table-delete! stats-hash dbname) ;; remove from stats hash
-;;; (stat-dbs-set! stats (hash-table-keys stats))))))
-;;; (hash-table-keys stats-hash))
-;;;
-;;; `(,(hash-table->alist (area-dbs acfg)) ;; dbname => randnum
-;;; ,(map (lambda (dbname) ;; dbname is the db name
-;;; (cons dbname (stat-when (hash-table-ref stats-hash dbname))))
-;;; (hash-table-keys stats-hash))
-;;; (cpuload . ,(get-normalized-cpu-load)))))
-;;; #;(stats . ,(map (lambda (k) ;; create an alist from the stats data
-;;; (cons k (stat->alist (hash-table-ref (area-stats acfg) k))))
-;;; (hash-table-keys (area-stats acfg))))
-;;;
-;;; #;(trace
-;;; ;; assv
-;;; ;; cdr
-;;; ;; caar
-;;; ;; ;; cdr
-;;; ;; call
-;;; ;; finalize-all-db-handles
-;;; ;; get-all-server-pkts
-;;; ;; get-normalized-cpu-load
-;;; ;; get-normalized-cpu-load-raw
-;;; ;; launch
-;;; ;; nmsg-send
-;;; ;; process-db-queries
-;;; ;; receive-message
-;;; ;; std-peer-handler
-;;; ;; update-known-servers
-;;; ;; work-queue-processor
-;;; )
-;;;
-;;; ;;======================================================================
-;;; ;; netutil
-;;; ;; move this back to ulex-netutil.scm someday?
-;;; ;;======================================================================
-;;;
-;;; ;; #include
-;;; ;; #include
-;;; ;; #include
-;;; ;; #include
-;;;
-;;; (foreign-declare "#include \"sys/types.h\"")
-;;; (foreign-declare "#include \"sys/socket.h\"")
-;;; (foreign-declare "#include \"ifaddrs.h\"")
-;;; (foreign-declare "#include \"arpa/inet.h\"")
-;;;
-;;; ;; get IP addresses from ALL interfaces
-;;; (define get-all-ips
-;;; (foreign-safe-lambda* scheme-object ()
-;;; "
-;;;
-;;; // from https://stackoverflow.com/questions/17909401/linux-c-get-default-interfaces-ip-address :
-;;;
-;;;
-;;; C_word lst = C_SCHEME_END_OF_LIST, len, str, *a;
-;;; // struct ifaddrs *ifa, *i;
-;;; // struct sockaddr *sa;
-;;;
-;;; struct ifaddrs * ifAddrStruct = NULL;
-;;; struct ifaddrs * ifa = NULL;
-;;; void * tmpAddrPtr = NULL;
-;;;
-;;; if ( getifaddrs(&ifAddrStruct) != 0)
-;;; C_return(C_SCHEME_FALSE);
-;;;
-;;; // for (i = ifa; i != NULL; i = i->ifa_next) {
-;;; for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) {
-;;; if (ifa->ifa_addr->sa_family==AF_INET) { // Check it is
-;;; // a valid IPv4 address
-;;; tmpAddrPtr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
-;;; char addressBuffer[INET_ADDRSTRLEN];
-;;; inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN);
-;;; // printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer);
-;;; len = strlen(addressBuffer);
-;;; a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len));
-;;; str = C_string(&a, len, addressBuffer);
-;;; lst = C_a_pair(&a, str, lst);
-;;; }
-;;;
-;;; // else if (ifa->ifa_addr->sa_family==AF_INET6) { // Check it is
-;;; // // a valid IPv6 address
-;;; // tmpAddrPtr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
-;;; // char addressBuffer[INET6_ADDRSTRLEN];
-;;; // inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN);
-;;; //// printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer);
-;;; // len = strlen(addressBuffer);
-;;; // a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len));
-;;; // str = C_string(&a, len, addressBuffer);
-;;; // lst = C_a_pair(&a, str, lst);
-;;; // }
-;;;
-;;; // else {
-;;; // printf(\" not an IPv4 address\\n\");
-;;; // }
-;;;
-;;; }
-;;;
-;;; freeifaddrs(ifa);
-;;; C_return(lst);
-;;;
-;;; "))
-;;;
-;;; ;; Change this to bias for addresses with a reasonable broadcast value?
-;;; ;;
-;;; (define (ip-pref-less? a b)
-;;; (let* ((rate (lambda (ipstr)
-;;; (regex-case ipstr
-;;; ( "^127\\." _ 0 )
-;;; ( "^(10\\.0|192\\.168\\.)\\..*" _ 1 )
-;;; ( else 2 ) ))))
-;;; (< (rate a) (rate b))))
-;;;
-;;;
-;;; (define (get-my-best-address)
-;;; (let ((all-my-addresses (get-all-ips))
-;;; ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))
-;;; )
-;;; (cond
-;;; ((null? all-my-addresses)
-;;; (get-host-name)) ;; no interfaces?
-;;; ((eq? (length all-my-addresses) 1)
-;;; (car all-my-addresses)) ;; only one to choose from, just go with it
-;;;
-;;; (else
-;;; (car (sort all-my-addresses ip-pref-less?)))
-;;; ;; (else
-;;; ;; (ip->string (car (filter (lambda (x) ;; take any but 127.
-;;; ;; (not (eq? (u8vector-ref x 0) 127)))
-;;; ;; all-my-addresses))))
-;;;
-;;; )))
-;;;
-;;; (define (get-all-ips-sorted)
-;;; (sort (get-all-ips) ip-pref-less?))
-;;;
-;;;
-
+ (map address-info-host
+ (filter (lambda (x)
+ (equal? (address-info-type x) "tcp"))
+ (address-infos (get-host-name)))))
+
+)
Index: utils/nbfake
==================================================================
--- utils/nbfake
+++ utils/nbfake
@@ -39,10 +39,11 @@
nbfake behavior can be changed by setting the following env vars:
NBFAKE_HOST SSH to \$NBFAKE_HOST and run command
NBFAKE_LOG Logfile for nbfake output
NB_WASH_GROUPS comma-separated list of groups to wash into
NB_WASH_ENABLED must be set in order to enable wash groups
+ NBFAKE_QUIET set to suppress informational output
__EOF
exit
fi
@@ -87,19 +88,21 @@
#==============================================================================
# Run and log
#==============================================================================
+if [[ -z "$NBFAKE_QUIET" ]];then
cat <<__EOF >&2
#======================================================================
# NBFAKE logging command to: $MY_NBFAKE_LOG
# $WASHCMD $*
#======================================================================
__EOF
+fi
if [[ -z "$MY_NBFAKE_HOST" ]]; then
# Run locally
sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &"
else
# run remotely
ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\""
fi