Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,13 +28,23 @@
ezsteps.scm lock-queue.scm rmt.scm api.scm \
subrun.scm portlogger.scm archive.scm env.scm \
diff-report.scm cgisetup/models/pgdb.scm
# module source files
-MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm
+MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
+ tcp-transportmod.scm
all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
+
+transport-mode.scm : transport-mode.scm.template
+ cp transport-mode.scm.template transport-mode.scm
+
+dashboard-transport-mode.scm : transport-mode.scm.template
+ cp transport-mode.scm.template dashboard-transport-mode.scm
+
+megatest.scm : transport-mode.scm
+dashboard.scm : dashboard-transport-mode.scm
# dbmod.import.o is just a hack here
mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o
Index: TODO
==================================================================
--- TODO
+++ TODO
@@ -16,10 +16,13 @@
# along with Megatest. If not, see .
TODO
====
+23WW07
+. Remove use of *dbstruct-dbs*
+
WW15
. fill newview matrix with data, filter pipeline gui elements
. improve [script], especially indent handling
WW16
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -18,21 +18,26 @@
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
;;======================================================================
-(use srfi-69 posix)
-
(declare (unit api))
(declare (uses rmt))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses tasks))
+(declare (uses tcp-transportmod))
(import dbmod)
(import dbfile)
+(import tcp-transportmod)
+
+(use srfi-69
+ posix
+ matchable
+ s11n)
;; allow these queries through without starting a server
;;
(define api:read-only-queries
'(get-key-val-pairs
@@ -142,93 +147,137 @@
tasks-add
tasks-set-state-given-param-key
))
(define *db-write-mutexes* (make-hash-table))
-
+(define *server-signature* #f)
;; These are called by the server on recipt of /api calls
;; - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;; - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
(db:open-no-sync-db) ;; sets *no-sync-db*
-;; (handle-exceptions
-;; exn
-;; (let ((call-chain (get-call-chain)))
-;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
-;; (print-call-chain (current-error-port))
-;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+ ;; (handle-exceptions
+ ;; exn
+ ;; (let ((call-chain (get-call-chain)))
+ ;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
+ ;; (print-call-chain (current-error-port))
+ ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
(if (> *api-process-request-count* 200)
(begin
(if (common:low-noise-print 30 "too many threads")
(debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay."))
(thread-sleep! 0.5) ;; take a nap
))
- (cond
- ((not (vector? dat)) ;; it is an error to not receive a vector
- (vector #f (vector #f "remote must be called with a vector")))
- #;((> *api-process-request-count* 200) ;; 20)
- (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
- (set! *server-overloaded* #t)
- (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
- (else
- (let* ((cmd-in (vector-ref dat 0))
- (cmd (if (symbol? cmd-in)
- cmd-in
- (string->symbol cmd-in)))
- (params (vector-ref dat 1))
- (run-id (if (null? params)
- 0
- (car params)))
- (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id)
- (hash-table-ref *db-write-mutexes* run-id)
- (let* ((newmutex (make-mutex)))
- (hash-table-set! *db-write-mutexes* run-id newmutex)
- newmutex)))
- (start-t (current-milliseconds))
- (readonly-mode (dbr:dbstruct-read-only dbstruct))
- (readonly-command (member cmd api:read-only-queries))
- (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
- (if (not readonly-command)
- (mutex-lock! write-mutex))
- (let* ((tmppath (dbr:dbstruct-tmppath dbstruct))
- (clean-run-id (cond
- ((number? run-id) run-id)
- ((equal? run-id #f) "main")
- (else "other")))
- (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params)))
- (res
- (if writecmd-in-readonly-mode
- (conc "attempt to run write command "cmd" on a read-only database")
- (api:dispatch-request dbstruct cmd run-id params))))
- (delete-file* crumbfile)
- (if (not readonly-command)
- (mutex-unlock! write-mutex))
-
- ;; save all stats
- (let ((delta-t (- (current-milliseconds)
- start-t))
- (modified-cmd (if (eq? cmd 'general-call)
- (string->symbol (conc "general-call-" (car params)))
- cmd)))
- (hash-table-set! *db-api-call-time* modified-cmd
- (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
- (if writecmd-in-readonly-mode
- (begin
- #;(common:telemetry-log (conc "api-out:"(->string cmd))
- payload: `((params . ,params)
- (ok-res . #t)))
- (vector #f res))
- (begin
- #;(common:telemetry-log (conc "api-out:"(->string cmd))
- payload: `((params . ,params)
- (ok-res . #f)))
- (vector #t res))))))))
+ (cond
+ ((not (vector? dat)) ;; it is an error to not receive a vector
+ (vector #f (vector #f "remote must be called with a vector")))
+ #;((> *api-process-request-count* 200) ;; 20)
+ (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
+ (set! *server-overloaded* #t)
+ (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
+ (else
+ (let* ((cmd-in (vector-ref dat 0))
+ (cmd (if (symbol? cmd-in)
+ cmd-in
+ (string->symbol cmd-in)))
+ (params (vector-ref dat 1))
+ (run-id (if (null? params)
+ 0
+ (car params)))
+ (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id)
+ (hash-table-ref *db-write-mutexes* run-id)
+ (let* ((newmutex (make-mutex)))
+ (hash-table-set! *db-write-mutexes* run-id newmutex)
+ newmutex)))
+ (start-t (current-milliseconds))
+ (readonly-mode (dbr:dbstruct-read-only dbstruct))
+ (readonly-command (member cmd api:read-only-queries))
+ (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
+ (if (not readonly-command)
+ (mutex-lock! write-mutex))
+ (let* ((tmppath (dbr:dbstruct-tmppath dbstruct))
+ (clean-run-id (cond
+ ((number? run-id) run-id)
+ ((equal? run-id #f) "main")
+ (else "other")))
+ (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params)))
+ (res
+ (if writecmd-in-readonly-mode
+ (conc "attempt to run write command "cmd" on a read-only database")
+ (api:dispatch-request dbstruct cmd run-id params))))
+ (delete-file* crumbfile)
+ (if (not readonly-command)
+ (mutex-unlock! write-mutex))
+
+ ;; save all stats
+ (let ((delta-t (- (current-milliseconds)
+ start-t))
+ (modified-cmd (if (eq? cmd 'general-call)
+ (string->symbol (conc "general-call-" (car params)))
+ cmd)))
+ (hash-table-set! *db-api-call-time* modified-cmd
+ (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
+ (if writecmd-in-readonly-mode
+ (begin
+ #;(common:telemetry-log (conc "api-out:"(->string cmd))
+ payload: `((params . ,params)
+ (ok-res . #t)))
+ (vector #f res))
+ (begin
+ #;(common:telemetry-log (conc "api-out:"(->string cmd))
+ payload: `((params . ,params)
+ (ok-res . #f)))
+ (vector #t res))))))))
+
+;; indat is (cmd run-id params meta)
+;;
+;; WARNING: Do not print anything in the lambda of this function as it
+;; reads/writes to current in/out port
+;;
+(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params)
+ (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.")
+ (if (not *server-signature*)
+ (set! *server-signature* (tt:mk-signature *toppath*)))
+ (lambda ()
+ (let* ((indat (deserialize))
+ (newcount (+ *api-process-request-count* 1))
+ (delay-wait (if (> newcount 10)
+ (- newcount 10)
+ 0)))
+ (set! *api-process-request-count* newcount)
+ (set! *db-last-access* (current-seconds))
+ (match indat
+ ((cmd run-id params meta)
+ (let* ((status (cond
+ ((> newcount 30) 'busy)
+ ((> newcount 15) 'loaded)
+ (else 'ok)))
+ (errmsg (case status
+ ((busy) (conc "Server overloaded, "newcount" threads in flight"))
+ ((loaded) (conc "Server loaded, "newcount" threads in flight"))
+ (else #f)))
+ (result (case status
+ ((busy) (- newcount 29))
+ ((loaded) #f)
+ (else
+ (case cmd
+ ((ping) *server-signature*)
+ (else
+ (api:dispatch-request dbstruct cmd run-id params))))))
+ (meta `((wait . ,delay-wait)))
+ (payload (list status errmsg result meta)))
+ (set! *api-process-request-count* (- *api-process-request-count* 1))
+ (serialize payload)))
+ (else
+ (assert #f "FATAL: failed to deserialize indat "indat))))))
+
(define (api:dispatch-request dbstruct cmd run-id params)
+ (db:open-no-sync-db)
(case cmd
;;===============================================
;; READ/WRITE QUERIES
;;===============================================
Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -104,11 +104,11 @@
(debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
(if (not runremote)
(begin
;; Here we are creating a runremote where there was none or it was clobbered with #f
;;
- (set! runremote (make-remote))
+ (set! runremote (make-and-init-remote))
(let* ((server-info (server:check-if-running areapath)))
(remote-server-info-set! runremote server-info)
(if server-info
(begin
(remote-server-url-set! runremote (server:record->url server-info))
Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -249,33 +249,10 @@
(define (common:get-sync-lock-filepath)
(let* ((tmp-area (common:get-db-tmp-area))
(lockfile (conc tmp-area "/megatest.db.sync-lock")))
lockfile))
-;;======================================================================
-;; when called from a wrapper I need sometimes to find the calling
-;; wrapper, this is for dashboard to find the correct megatest.
-;;
-(define (common:find-local-megatest #!optional (progname "megatest"))
- (let ((res (filter file-exists?
- (map (lambda (updir)
- (let* ((lm (car (argv)))
- (dir (pathname-directory lm))
- (exe (pathname-strip-directory lm)))
- (conc (if dir (conc dir "/") "")
- (case (string->symbol exe)
- ((dboard) (conc updir progname))
- ((mtest) (conc updir progname))
- ((dashboard) progname)
- (else exe)))))
- '("../../" "../")))))
- (if (null? res)
- (begin
- (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
- progname)
- (car res))))
-
(define *common:logpro-exit-code->status-sym-alist*
'( ( 0 . pass )
( 1 . fail )
( 2 . warn )
( 3 . check )
@@ -998,25 +975,19 @@
#f)))
(define (common:get-area-path-signature)
(message-digest-string (md5-primitive) *toppath*))
-(define (common:get-signature str)
- (message-digest-string (md5-primitive) str))
-
;;======================================================================
;; E X I T H A N D L I N G
;;======================================================================
(define (common:run-sync?)
(and *toppath* ;; gate if called before *toppath* is set
(common:on-homehost?)
(args:get-arg "-server")))
-(define (common:human-time)
- (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
-
(define (std-signal-handler signum)
;; (signal-mask! signum)
(set! *time-to-exit* #t)
;;(debug:print-info 13 *default-log-port* "got signal "signum)
@@ -2043,16 +2014,20 @@
(begin
(debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.")
(thread-sleep! 30)
(if (< (- (current-seconds) start-time) 300)
(loop start-time)))))
- (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
- #f
- (server:choose-server *toppath* 'homehost)))
- (hh (if hh-dat (car hh-dat) #f)))
- (common:wait-for-normalized-load maxnormload msg hh)))
-
+ (case (rmt:transport-mode)
+ ((http)
+ (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+ #f
+ (server:choose-server *toppath* 'homehost)))
+ (hh (if hh-dat (car hh-dat) #f)))
+ (common:wait-for-normalized-load maxnormload msg hh)))
+ (else
+ (common:wait-for-normalized-load maxnormload msg (get-host-name)))))
+
(define (common:get-num-cpus remote-host)
(let* ((actual-host (or remote-host (get-host-name))))
;; hosts had better not be changing the number of cpus too often!
(or (hash-table-ref/default *numcpus-cache* actual-host #f)
(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
@@ -2653,291 +2628,10 @@
(cond
(with-vars (common:without-vars fullcmd))
(with-orig-env (common:with-orig-env fullcmd))
(else (common:without-vars fullcmd "MT_.*")))))
-;;======================================================================
-;; T I M E A N D D A T E
-;;======================================================================
-
-;;======================================================================
-;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
-(define (common:hms-string->seconds tstr)
- (let ((parts (string-split-fields "\\w+" tstr))
- (time-secs 0)
- ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
- (trx (regexp "(\\d+)([smhdMyw])")))
- (for-each (lambda (part)
- (let ((match (string-match trx part)))
- (if match
- (let ((val (string->number (cadr match)))
- (unt (caddr match)))
- (if val
- (set! time-secs (+ time-secs (* val
- (case (string->symbol unt)
- ((s) 1)
- ((m) 60) ;; minutes
- ((h) 3600)
- ((d) 86400)
- ((w) 604800)
- ((M) 2628000) ;; aproximately one month
- ((y) 31536000)
- (else #f))))))))))
- parts)
- time-secs))
-
-(define (seconds->hr-min-sec secs)
- (let* ((hrs (quotient secs 3600))
- (min (quotient (- secs (* hrs 3600)) 60))
- (sec (- secs (* hrs 3600)(* min 60))))
- (conc (if (> hrs 0)(conc hrs "hr ") "")
- (if (> min 0)(conc min "m ") "")
- sec "s")))
-
-(define (seconds->time-string sec)
- (time->string
- (seconds->local-time sec) "%H:%M:%S"))
-
-(define (seconds->work-week/day-time sec)
- (time->string
- (seconds->local-time sec) "ww%V.%u %H:%M"))
-
-(define (seconds->work-week/day sec)
- (time->string
- (seconds->local-time sec) "ww%V.%u"))
-
-(define (seconds->year-work-week/day sec)
- (time->string
- (seconds->local-time sec) "%yww%V.%w"))
-
-(define (seconds->year-work-week/day-time sec)
- (time->string
- (seconds->local-time sec) "%Yww%V.%w %H:%M"))
-
-(define (seconds->year-week/day-time sec)
- (time->string
- (seconds->local-time sec) "%Yw%V.%w %H:%M"))
-
-(define (seconds->quarter sec)
- (case (string->number
- (time->string
- (seconds->local-time sec)
- "%m"))
- ((1 2 3) 1)
- ((4 5 6) 2)
- ((7 8 9) 3)
- ((10 11 12) 4)
- (else #f)))
-
-;;======================================================================
-;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
-;;
-(define (common:date-time->seconds datetime)
- (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))
-
-;;======================================================================
-;; given span of seconds tstart to tend
-;; find start time to mark and mark delta
-;;
-(define (common:find-start-mark-and-mark-delta tstart tend)
- (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
- (result #f)
- (min 60)
- (hr (* 60 60))
- (day (* 24 hr))
- (yr (* 365 day)) ;; year
- (mo (/ yr 12))
- (wk (* day 7)))
- (for-each
- (lambda (max-blks)
- (for-each
- (lambda (span) ;; 5 2 1
- (if (not result)
- (for-each
- (lambda (timeunit timesym) ;; year month day hr min sec
- (if (not result)
- (let* ((time-blk (* span timeunit))
- (num-blks (quotient deltat time-blk)))
- (if (and (> num-blks 4)(< num-blks max-blks))
- (let ((first (* (quotient tstart time-blk) time-blk)))
- (set! result (list span timeunit time-blk first timesym))
- )))))
- (list yr mo wk day hr min 1)
- '( y mo w d h m s))))
- (list 8 6 5 2 1)))
- '(5 10 15 20 30 40 50 500))
- (if values
- (apply values result)
- (values 0 day 1 0 'd))))
-
-;;======================================================================
-;; given x y lim return the cron expansion
-;;
-(define (common:expand-cron-slash x y lim)
- (let loop ((curr x)
- (res `()))
- (if (< curr lim)
- (loop (+ curr y) (cons curr res))
- (reverse res))))
-
-;;======================================================================
-;; expand a complex cron string to a list of cron strings
-;;
-;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c
-;;
-;; NOTE: with flatten a lot of the crud below can be factored down.
-;;
-(define (common:cron-expand cron-str)
- (if (list? cron-str)
- (flatten
- (fold (lambda (x res)
- (if (list? x)
- (let ((newres (map common:cron-expand x)))
- (append x newres))
- (cons x res)))
- '()
- cron-str)) ;; (map common:cron-expand cron-str))
- (let ((cron-items (string-split cron-str))
- (slash-rx (regexp "(\\d+)/(\\d+)"))
- (comma-rx (regexp ".*,.*"))
- (max-vals '((min . 60)
- (hour . 24)
- (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
- (month . 12)
- (dayofweek . 7))))
- (if (< (length cron-items) 5) ;; bad spec
- cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it
- (let loop ((hed (car cron-items))
- (tal (cdr cron-items))
- (type 'min)
- (type-tal '(hour dayofmonth month dayofweek))
- (res '()))
- (regex-case
- hed
- (slash-rx ( _ base incr ) (let* ((basen (string->number base))
- (incrn (string->number incr))
- (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
- (new-list-crons (fold (lambda (x myres)
- (cons (conc (if (null? res)
- ""
- (conc (string-intersperse res " ") " "))
- x " " (string-intersperse tal " "))
- myres))
- '() expanded-vals)))
- ;; (print "new-list-crons: " new-list-crons)
- ;; (fold (lambda (x res)
- ;; (if (list? x)
- ;; (let ((newres (map common:cron-expand x)))
- ;; (append x newres))
- ;; (cons x res)))
- ;; '()
- (flatten (map common:cron-expand new-list-crons))))
- ;; (map common:cron-expand (map common:cron-expand new-list-crons))))
- (else (if (null? tal)
- cron-str
- (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))
-
-;;======================================================================
-;; given a cron string and the last time event was processed return #t to run or #f to not run
-;;
-;; min hour dayofmonth month dayofweek
-;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7
-;;
-;; #t => yes, run the job
-;; #f => no, do not run the job
-;;
-(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
- (let* ((cron-items (map string->number (string-split cron-str)))
- (now-seconds (or now-seconds-in (current-seconds)))
- (now-time (seconds->local-time now-seconds))
- (last-done-time (seconds->local-time last-done))
- (all-times (make-hash-table)))
- ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
- (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
- #f
- (match-let ((( cmin chour cdayofmonth cmonth cdayofweek)
- cron-items)
- ;; 0 1 2 3 4 5 6
- ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
- (vector->list now-time))
- ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
- (vector->list last-done-time)))
- ;; create all possible time slots
- ;; remove invalid slots due to (for example) day of week
- ;; get the start and end entries for the ref-seconds (current) time
- ;; if last-done > ref-seconds => this is an ERROR!
- ;; does the last-done time fall in the legit region?
- ;; yes => #f do not run again this command
- ;; no => #t ok to run the command
- (for-each ;; month
- (lambda (month)
- (for-each ;; dayofmonth
- (lambda (dom)
- (for-each
- (lambda (hr) ;; hour
- (for-each
- (lambda (minute) ;; minute
- (let ((copy-now (apply vector (vector->list now-time))))
- (vector-set! copy-now 0 0) ;; force seconds to zero
- (vector-set! copy-now 1 minute)
- (vector-set! copy-now 2 hr)
- (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced
- (vector-set! copy-now 4 month)
- (let* ((copy-now-secs (local-time->seconds copy-now))
- (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector
- (if (or (not cdayofweek)
- (equal? (vector-ref new-copy 6)
- cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
- (if (or (not cdayofmonth)
- (equal? (vector-ref new-copy 3)
- (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
- (hash-table-set! all-times copy-now-secs new-copy))))))
- (if cmin
- `(,cmin) ;; if given cmin, have to use it
- (list (- nmin 1) nmin (+ nmin 1))))) ;; minute
- (if chour
- `(,chour)
- (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
- (if cdayofmonth
- `(,cdayofmonth)
- (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
- (if cmonth
- `(,cmonth)
- (list (- nmonth 1) nmonth (+ nmonth 1))))
- (let ((before #f)
- (is-in #f))
- (for-each
- (lambda (moment)
- (if (and before
- (<= before now-seconds)
- (>= moment now-seconds))
- (begin
- ;; (print)
- ;; (print "Before: " (time->string (seconds->local-time before)))
- ;; (print "Now: " (time->string (seconds->local-time now-seconds)))
- ;; (print "After: " (time->string (seconds->local-time moment)))
- ;; (print "Last: " (time->string (seconds->local-time last-done)))
- (if (< last-done before)
- (set! is-in before))
- ))
- (set! before moment))
- (sort (hash-table-keys all-times) <))
- is-in)))))
-
-(define (common:extended-cron cron-str now-seconds-in last-done)
- (let ((expanded-cron (common:cron-expand cron-str)))
- (if (string? expanded-cron)
- (common:cron-event expanded-cron now-seconds-in last-done)
- (let loop ((hed (car expanded-cron))
- (tal (cdr expanded-cron)))
- (if (common:cron-event hed now-seconds-in last-done)
- #t
- (if (null? tal)
- #f
- (loop (car tal)(cdr tal))))))))
-
;;======================================================================
;; C O L O R S
;;======================================================================
(define (common:name->iup-color name)
Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -17,21 +17,37 @@
;; along with Megatest. If not, see .
;;======================================================================
(declare (unit commonmod))
+;; (declare (uses debugprint))
(use srfi-69)
(module commonmod
*
-(import scheme chicken data-structures extras files)
-(import (prefix sqlite3 sqlite3:)
- posix typed-records srfi-18 srfi-69
- md5 message-digest
- regex srfi-1)
+(import scheme
+ chicken
+
+ (prefix sqlite3 sqlite3:)
+ data-structures
+ extras
+ files
+ matchable
+ md5
+ message-digest
+ posix
+ regex
+ regex-case
+ srfi-1
+ srfi-18
+ srfi-69
+ typed-records
+
+ ;; debugprint
+ )
;;======================================================================
;; CONTENTS
;;
;; config file utils
@@ -161,10 +177,40 @@
(filter (lambda (x)
(not (string-match "^\\s*" x)))
val-list))
'())))
+(define (get-cpu-load)
+ (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines)))
+ (map string->number (string-split load-info))))
+
+(define *current-host-cores* #f)
+
+(define (get-current-host-cores)
+ (or *current-host-cores*
+ (let ((cpu-info (with-input-from-file "/proc/cpuinfo" read-lines)))
+ (let loop ((lines cpu-info))
+ (if (null? lines)
+ 1 ;; gotta be at least one!
+ (let* ((inl (car lines))
+ (tail (cdr lines))
+ (parts (string-split inl)))
+ (match parts
+ (("cpu" "cores" ":" num) (string->number num))
+ (else (loop tail)))))))))
+
+(define (number-of-processes-running processname)
+ (with-input-from-pipe
+ (conc "ps -def | egrep \""processname"\" |wc -l")
+ (lambda ()
+ (string->number (read-line)))))
+
+;; get the normalized (i.e. load / numcpus) for *this* host
+;;
+(define (get-normalized-cpu-load)
+ (/ (get-cpu-load)(get-current-host-cores)))
+
;;======================================================================
;; testsuite and area utilites
;;======================================================================
(define (get-testsuite-name toppath configdat)
@@ -208,18 +254,344 @@
(let ((adat (get-section cfgdat "areas")))
(map (lambda (entry)
`(,(car entry) .
,(val->alist (cadr entry))))
adat)))
+
+;;======================================================================
+;; time utils
+;;======================================================================
+
+(define (common:human-time)
+ (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
+
+;;======================================================================
+;; T I M E A N D D A T E
+;;======================================================================
+
+;;======================================================================
+;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
+(define (common:hms-string->seconds tstr)
+ (let ((parts (string-split-fields "\\w+" tstr))
+ (time-secs 0)
+ ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
+ (trx (regexp "(\\d+)([smhdMyw])")))
+ (for-each (lambda (part)
+ (let ((match (string-match trx part)))
+ (if match
+ (let ((val (string->number (cadr match)))
+ (unt (caddr match)))
+ (if val
+ (set! time-secs (+ time-secs (* val
+ (case (string->symbol unt)
+ ((s) 1)
+ ((m) 60) ;; minutes
+ ((h) 3600)
+ ((d) 86400)
+ ((w) 604800)
+ ((M) 2628000) ;; aproximately one month
+ ((y) 31536000)
+ (else #f))))))))))
+ parts)
+ time-secs))
+
+(define (seconds->hr-min-sec secs)
+ (let* ((hrs (quotient secs 3600))
+ (min (quotient (- secs (* hrs 3600)) 60))
+ (sec (- secs (* hrs 3600)(* min 60))))
+ (conc (if (> hrs 0)(conc hrs "hr ") "")
+ (if (> min 0)(conc min "m ") "")
+ sec "s")))
+
+(define (seconds->time-string sec)
+ (time->string
+ (seconds->local-time sec) "%H:%M:%S"))
+
+(define (seconds->work-week/day-time sec)
+ (time->string
+ (seconds->local-time sec) "ww%V.%u %H:%M"))
+
+(define (seconds->work-week/day sec)
+ (time->string
+ (seconds->local-time sec) "ww%V.%u"))
+
+(define (seconds->year-work-week/day sec)
+ (time->string
+ (seconds->local-time sec) "%yww%V.%w"))
+
+(define (seconds->year-work-week/day-time sec)
+ (time->string
+ (seconds->local-time sec) "%Yww%V.%w %H:%M"))
+
+(define (seconds->year-week/day-time sec)
+ (time->string
+ (seconds->local-time sec) "%Yw%V.%w %H:%M"))
+
+(define (seconds->quarter sec)
+ (case (string->number
+ (time->string
+ (seconds->local-time sec)
+ "%m"))
+ ((1 2 3) 1)
+ ((4 5 6) 2)
+ ((7 8 9) 3)
+ ((10 11 12) 4)
+ (else #f)))
+
+;;======================================================================
+;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
+;;
+(define (common:date-time->seconds datetime)
+ (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))
+
+;;======================================================================
+;; given span of seconds tstart to tend
+;; find start time to mark and mark delta
+;;
+(define (common:find-start-mark-and-mark-delta tstart tend)
+ (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
+ (result #f)
+ (min 60)
+ (hr (* 60 60))
+ (day (* 24 hr))
+ (yr (* 365 day)) ;; year
+ (mo (/ yr 12))
+ (wk (* day 7)))
+ (for-each
+ (lambda (max-blks)
+ (for-each
+ (lambda (span) ;; 5 2 1
+ (if (not result)
+ (for-each
+ (lambda (timeunit timesym) ;; year month day hr min sec
+ (if (not result)
+ (let* ((time-blk (* span timeunit))
+ (num-blks (quotient deltat time-blk)))
+ (if (and (> num-blks 4)(< num-blks max-blks))
+ (let ((first (* (quotient tstart time-blk) time-blk)))
+ (set! result (list span timeunit time-blk first timesym))
+ )))))
+ (list yr mo wk day hr min 1)
+ '( y mo w d h m s))))
+ (list 8 6 5 2 1)))
+ '(5 10 15 20 30 40 50 500))
+ (if values
+ (apply values result)
+ (values 0 day 1 0 'd))))
+
+;;======================================================================
+;; given x y lim return the cron expansion
+;;
+(define (common:expand-cron-slash x y lim)
+ (let loop ((curr x)
+ (res `()))
+ (if (< curr lim)
+ (loop (+ curr y) (cons curr res))
+ (reverse res))))
+
+;;======================================================================
+;; expand a complex cron string to a list of cron strings
+;;
+;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c
+;;
+;; NOTE: with flatten a lot of the crud below can be factored down.
+;;
+(define (common:cron-expand cron-str)
+ (if (list? cron-str)
+ (flatten
+ (fold (lambda (x res)
+ (if (list? x)
+ (let ((newres (map common:cron-expand x)))
+ (append x newres))
+ (cons x res)))
+ '()
+ cron-str)) ;; (map common:cron-expand cron-str))
+ (let ((cron-items (string-split cron-str))
+ (slash-rx (regexp "(\\d+)/(\\d+)"))
+ (comma-rx (regexp ".*,.*"))
+ (max-vals '((min . 60)
+ (hour . 24)
+ (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
+ (month . 12)
+ (dayofweek . 7))))
+ (if (< (length cron-items) 5) ;; bad spec
+ cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it
+ (let loop ((hed (car cron-items))
+ (tal (cdr cron-items))
+ (type 'min)
+ (type-tal '(hour dayofmonth month dayofweek))
+ (res '()))
+ (regex-case
+ hed
+ (slash-rx ( _ base incr ) (let* ((basen (string->number base))
+ (incrn (string->number incr))
+ (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
+ (new-list-crons (fold (lambda (x myres)
+ (cons (conc (if (null? res)
+ ""
+ (conc (string-intersperse res " ") " "))
+ x " " (string-intersperse tal " "))
+ myres))
+ '() expanded-vals)))
+ ;; (print "new-list-crons: " new-list-crons)
+ ;; (fold (lambda (x res)
+ ;; (if (list? x)
+ ;; (let ((newres (map common:cron-expand x)))
+ ;; (append x newres))
+ ;; (cons x res)))
+ ;; '()
+ (flatten (map common:cron-expand new-list-crons))))
+ ;; (map common:cron-expand (map common:cron-expand new-list-crons))))
+ (else (if (null? tal)
+ cron-str
+ (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))
+
+;;======================================================================
+;; given a cron string and the last time event was processed return #t to run or #f to not run
+;;
+;; min hour dayofmonth month dayofweek
+;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7
+;;
+;; #t => yes, run the job
+;; #f => no, do not run the job
+;;
+(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
+ (let* ((cron-items (map string->number (string-split cron-str)))
+ (now-seconds (or now-seconds-in (current-seconds)))
+ (now-time (seconds->local-time now-seconds))
+ (last-done-time (seconds->local-time last-done))
+ (all-times (make-hash-table)))
+ ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
+ (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
+ #f
+ (match-let ((( cmin chour cdayofmonth cmonth cdayofweek)
+ cron-items)
+ ;; 0 1 2 3 4 5 6
+ ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
+ (vector->list now-time))
+ ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
+ (vector->list last-done-time)))
+ ;; create all possible time slots
+ ;; remove invalid slots due to (for example) day of week
+ ;; get the start and end entries for the ref-seconds (current) time
+ ;; if last-done > ref-seconds => this is an ERROR!
+ ;; does the last-done time fall in the legit region?
+ ;; yes => #f do not run again this command
+ ;; no => #t ok to run the command
+ (for-each ;; month
+ (lambda (month)
+ (for-each ;; dayofmonth
+ (lambda (dom)
+ (for-each
+ (lambda (hr) ;; hour
+ (for-each
+ (lambda (minute) ;; minute
+ (let ((copy-now (apply vector (vector->list now-time))))
+ (vector-set! copy-now 0 0) ;; force seconds to zero
+ (vector-set! copy-now 1 minute)
+ (vector-set! copy-now 2 hr)
+ (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced
+ (vector-set! copy-now 4 month)
+ (let* ((copy-now-secs (local-time->seconds copy-now))
+ (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector
+ (if (or (not cdayofweek)
+ (equal? (vector-ref new-copy 6)
+ cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
+ (if (or (not cdayofmonth)
+ (equal? (vector-ref new-copy 3)
+ (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
+ (hash-table-set! all-times copy-now-secs new-copy))))))
+ (if cmin
+ `(,cmin) ;; if given cmin, have to use it
+ (list (- nmin 1) nmin (+ nmin 1))))) ;; minute
+ (if chour
+ `(,chour)
+ (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
+ (if cdayofmonth
+ `(,cdayofmonth)
+ (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
+ (if cmonth
+ `(,cmonth)
+ (list (- nmonth 1) nmonth (+ nmonth 1))))
+ (let ((before #f)
+ (is-in #f))
+ (for-each
+ (lambda (moment)
+ (if (and before
+ (<= before now-seconds)
+ (>= moment now-seconds))
+ (begin
+ ;; (print)
+ ;; (print "Before: " (time->string (seconds->local-time before)))
+ ;; (print "Now: " (time->string (seconds->local-time now-seconds)))
+ ;; (print "After: " (time->string (seconds->local-time moment)))
+ ;; (print "Last: " (time->string (seconds->local-time last-done)))
+ (if (< last-done before)
+ (set! is-in before))
+ ))
+ (set! before moment))
+ (sort (hash-table-keys all-times) <))
+ is-in)))))
+
+(define (common:extended-cron cron-str now-seconds-in last-done)
+ (let ((expanded-cron (common:cron-expand cron-str)))
+ (if (string? expanded-cron)
+ (common:cron-event expanded-cron now-seconds-in last-done)
+ (let loop ((hed (car expanded-cron))
+ (tal (cdr expanded-cron)))
+ (if (common:cron-event hed now-seconds-in last-done)
+ #t
+ (if (null? tal)
+ #f
+ (loop (car tal)(cdr tal))))))))
+
+
;;======================================================================
;; misc stuff
;;======================================================================
-;; (define (debug:print . params) #f)
-;; (define (debug:print-info . params) #f)
-;;
-;; (define (set-functions dbgp dbgpinfo)
-;; (set! debug:print dbgp)
-;; (set! debug:print-info dbgpinfo))
+(define (common:get-signature str)
+ (message-digest-string (md5-primitive) str))
+
+;;======================================================================
+;; hash of hashs
+;;======================================================================
+
+(define (db:hoh-set! dat key1 key2 val)
+ (let* ((subhash (hash-table-ref/default dat key1 #f)))
+ (if subhash
+ (hash-table-set! subhash key2 val)
+ (begin
+ (hash-table-set! dat key1 (make-hash-table))
+ (db:hoh-set! dat key1 key2 val)))))
+
+(define (db:hoh-get dat key1 key2)
+ (let* ((subhash (hash-table-ref/default dat key1 #f)))
+ (and subhash
+ (hash-table-ref/default subhash key2 #f))))
+
+;;======================================================================
+;; when called from a wrapper I need sometimes to find the calling
+;; wrapper, this is for dashboard to find the correct megatest.
+;;
+(define (common:find-local-megatest #!optional (progname "megatest"))
+ (let ((res (filter file-exists?
+ (map (lambda (updir)
+ (let* ((lm (car (argv)))
+ (dir (pathname-directory lm))
+ (exe (pathname-strip-directory lm)))
+ (conc (if dir (conc dir "/") "")
+ (case (string->symbol exe)
+ ((dboard) (conc updir progname))
+ ((mtest) (conc updir progname))
+ ((dashboard) progname)
+ (else exe)))))
+ '("../../" "../")))))
+ (if (null? res)
+ (begin
+ ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
+ progname)
+ (car res))))
)
Index: dashboard-context-menu.scm
==================================================================
--- dashboard-context-menu.scm
+++ dashboard-context-menu.scm
@@ -32,10 +32,11 @@
(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))
(declare (unit dashboard-context-menu))
(declare (uses common))
+(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
@@ -43,10 +44,12 @@
(declare (uses subrun))
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
+
+(import commonmod)
(define (dboard:launch-testpanel run-id test-id)
(let* ((dboardexe (common:find-local-megatest "dashboard"))
(cmd (conc dboardexe
" -test " run-id "," test-id
Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -31,17 +31,20 @@
(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))
(declare (unit dashboard-tests))
(declare (uses common))
+(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))
+
+(import commonmod)
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
@@ -459,12 +462,11 @@
;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
(let* ((db-path (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
- (dbstruct #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path: (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree")
- ;; local: #t))
+ (dbstruct #f) ;; NOT USED
(testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
(db-mod-time 0) ;; (file-modification-time db-path))
(last-update 0) ;; (current-seconds))
(request-update #t))
(if (not testdat)
Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -44,20 +44,28 @@
(declare (uses dcommon))
(declare (uses dashboard-context-menu))
(declare (uses vg))
(declare (uses subrun))
(declare (uses mt))
-(declare (uses dbfile))
+(declare (uses dbmod))
+;; (declare (uses dbmemmod))
+(declare (uses dbfile))
+
+(import dbmod dbfile)
(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "vg_records.scm")
+;; set some parameters here - these need to be put in something that can be loaded from other
+;; executables such as dashboard and mtutil
+;;
+(include "dashboard-transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)
(define help (conc
"Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version
" license GPL, Copyright (C) Matt Welland 2012-2017
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -22,10 +22,20 @@
;; Database access
;;======================================================================
;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc
+(declare (unit db))
+(declare (uses common))
+(declare (uses debugprint))
+(declare (uses dbmod))
+(declare (uses dbfile))
+(declare (uses keys))
+(declare (uses ods))
+(declare (uses client))
+(declare (uses mt))
+
(use (srfi 18)
extras
tcp
stack
(prefix sqlite3 sqlite3:)
@@ -44,28 +54,19 @@
z3
typed-records
matchable
files)
-(declare (unit db))
-(declare (uses common))
-(declare (uses dbmod))
-;; (declare (uses debugprint))
-(declare (uses dbfile))
-(declare (uses keys))
-(declare (uses ods))
-(declare (uses client))
-(declare (uses mt))
-
(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")
(define *number-of-writes* 0)
(define *number-non-write-queries* 0)
+(import debugprint)
(import dbmod)
(import dbfile)
;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
@@ -74,10 +75,17 @@
(state #f)
(status #f)
(count 0))
+(define (db:with-db dbstruct run-id r/w proc . params)
+ (case (rmt:transport-mode)
+ ((http)(dbfile:with-db dbstruct run-id r/w proc params))
+ ((tcp) (dbmod:with-db dbstruct run-id r/w proc params))
+ ((nfs) (dbmod:with-db dbstruct run-id r/w proc params))
+ (else (assert #f "FATAL: db:with-db called with non-existant transport mode"))))
+
;;======================================================================
;; hash of hashs
;;======================================================================
@@ -124,10 +132,83 @@
(assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
(let* ((tmpdir (common:get-db-tmp-area)))
(if (not *dbstruct-dbs*)
(dbfile:setup do-sync *toppath* tmpdir)
*dbstruct-dbs*)))
+
+;; moved from dbfile
+;;
+;; ADD run-id SUPPORT
+;;
+(define (db:create-all-triggers dbstruct)
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (db:create-triggers db))))
+
+(define (db:create-triggers db)
+ (for-each (lambda (key)
+ (sqlite3:execute db (cadr key)))
+ db:trigger-list))
+
+(define (db:drop-all-triggers dbstruct)
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (db:drop-triggers db))))
+
+(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
+ (let* ((incompleted '())
+ (oldlaunched '())
+ (toplevels '())
+ ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
+ (deadtime (or ovr-deadtime 72000))) ;; twenty hours
+ (db:with-db
+ dbstruct run-id #f
+ (lambda (dbdat db)
+
+ ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
+ ;;
+ ;; HOWEVER: this code in run:test seems to work fine
+ ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
+ ;; (db:test-get-run_duration testdat)))
+ ;; 600)
+ ;; (db:delay-if-busy dbdat)
+ (sqlite3:for-each-row
+ (lambda (test-id run-dir uname testname item-path)
+ (if (and (equal? uname "n/a")
+ (equal? item-path "")) ;; this is a toplevel test
+ ;; what to do with toplevel? call rollup?
+ (begin
+ (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
+ ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
+ (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
+ (db:get-cache-stmth dbdat db
+ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
+ run-id deadtime)
+
+ ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
+ ;;
+ ;; (db:delay-if-busy dbdat)
+ (sqlite3:for-each-row
+ (lambda (test-id run-dir uname testname item-path)
+ (if (and (equal? uname "n/a")
+ (equal? item-path "")) ;; this is a toplevel test
+ ;; what to do with toplevel? call rollup?
+ (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+ (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
+ (db:get-cache-stmth dbdat db
+ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
+ run-id)
+
+ ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
+ (if (and (null? incompleted)
+ (null? oldlaunched)
+ (null? toplevels))
+ #f
+ #t)))))
+
;; looks up subdb and returns it, if not found then set up
;; and then return it.
;;
#;(define (db:get-db dbstruct run-id)
@@ -513,11 +594,11 @@
(dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
(keys (db:get-keys dbstruct))
(sync-durations (make-hash-table)))
;; kill servers
- (if (and killservers servers)(db:kill-servers))
+ (if killservers (db:kill-servers))
(if (not dbfiles)
(debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.megatest"))
(for-each
(lambda (srcfile)
@@ -583,12 +664,11 @@
(define (db:tmp->megatest.db-sync dbstruct run-id last-update)
(let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
(res '()))
(for-each
(lambda (subdb)
- (let* ((dbname (db:run-id->dbname run-id))
- (mtdb (dbr:subdb-mtdb subdb))
+ (let* ((mtdb (dbr:subdb-mtdb subdb))
(tmpdb (db:get-subdb dbstruct run-id))
(refndb (dbr:subdb-refndb subdb))
(newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
;; BUG: verify this is really needed
@@ -1392,29 +1472,37 @@
;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================
-(define (db:no-sync-db db-in)
- (if db-in
- db-in
- (if *no-sync-db*
- *no-sync-db*
- (begin
- (mutex-lock! *db-access-mutex*)
- (let ((dbpath (common:get-db-tmp-area))
- (db (dbfile:open-no-sync-db dbpath)))
- (set! *no-sync-db* db)
- (mutex-unlock! *db-access-mutex*)
- db)))))
+(define (db:get-dbsync-path)
+ (case (rmt:transport-mode)
+ ((http)(common:get-db-tmp-area))
+ ((tcp) (conc *toppath*"/.megatest"))
+ ((nfs) (conc *toppath*"/.megatest"))
+ (else "/tmp/dunno-this-gonna-exist")))
+
+ ;; (define (db:no-sync-db db-in)
+ ;; (if db-in
+ ;; db-in
+ ;; (if *no-sync-db*
+ ;; *no-sync-db*
+ ;; (begin
+ ;; (mutex-lock! *db-access-mutex*)
+ ;; (let ((dbpath (db:get-dbsync-path))
+ ;; (db (dbfile:open-no-sync-db dbpath)))
+ ;; (assert (sqlite3:database? db) "FATAL: db:no-sync-db failed to open a database")
+ ;; (set! *no-sync-db* db)
+ ;; (mutex-unlock! *db-access-mutex*)
+ ;; db)))))
(define (with-no-sync-db proc)
- (let* ((db (db:no-sync-db *no-sync-db*)))
+ (let* ((db (db:open-no-sync-db)))
(proc db)))
(define (db:open-no-sync-db)
- (dbfile:open-no-sync-db (db:dbfile-path)))
+ (dbfile:open-no-sync-db (db:get-dbsync-path)))
(define (db:no-sync-close-db db stmt-cache)
(db:safely-close-sqlite3-db db stmt-cache))
@@ -1953,19 +2041,20 @@
(debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
(vector header
(reverse
- (db:with-db dbstruct #f #f ;; reads db, does not write to it.
- (lambda (dbdat db)
- (sqlite3:fold-row
- (lambda (res . r)
- (cons (list->vector r) res))
- '()
- db
- qry-str
- runnamepatt)))))))
+ (db:with-db
+ dbstruct #f #f ;; reads db, does not write to it.
+ (lambda (dbdat db)
+ (sqlite3:fold-row
+ (lambda (res . r)
+ (cons (list->vector r) res))
+ '()
+ db
+ qry-str
+ runnamepatt)))))))
;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;; this is inconsistent with get-runs but it makes some sense.
;;
@@ -4666,10 +4755,13 @@
(begin
(sqlite3:interrupt! db)
(sqlite3:finalize! db #t)
;; (vector-set! *task-db* 0 #f)
(set! *task-db* #f)))))
+ (if (and *no-sync-db*
+ (sqlite3:database? *no-sync-db*))
+ (sqlite3:finalize! *no-sync-db* #t))
(if (and (not (args:get-arg "-server"))
*runremote*)
(begin
(debug:print-info 0 *default-log-port* "Closing all client connections...")
(http-transport:close-connections *runremote*)
Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -41,11 +41,12 @@
commonmod
;; debugprint
)
(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
-(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest
+(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest
+(define dbfile:testsuite-name (make-parameter #f))
;;======================================================================
;; R E C O R D S
;;======================================================================
@@ -56,10 +57,19 @@
(areapath #f)
(homehost #f)
(tmppath #f)
(read-only #f)
(subdbs (make-hash-table))
+ ;;
+ ;; for the inmem approach (see dbmod.scm)
+ ;; this is one db per server
+ (inmem #f) ;; handle for the in memory copy
+ (dbfile #f) ;; path to the db file on disk
+ (ondiskdb #f) ;; handle for the on-disk file
+ (dbdat #f) ;; create a dbdat for the downstream calls such as db:with-db
+ (last-update 0)
+ (sync-proc #f)
)
;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
@@ -94,10 +104,11 @@
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access* #t)
(define *db-last-sync* 0) ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync*
+(define *db-last-access* (current-seconds))
(define (db:generic-error-printout exn . message)
(print-call-chain (current-error-port))
(apply dbfile:print-err message)
(dbfile:print-err
@@ -159,39 +170,10 @@
)
#f
)
)
-;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
-;; ;;
-;; (define (db:setup-db dbstruct areapath run-id)
-;; (let* ((dbname (db:run-id->dbname run-id))
-;; (dbstruct (hash-table-ref/default dbstructs dbname #f)))
-;; (if dbstruct
-;; dbstruct
-;; (let* ((dbstruct-new (make-dbr:dbstruct)))
-;; (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t)
-;; (hash-table-set! dbstructs dbname dbstruct-new)
-;; dbstruct-new))))
-
-;; ; Returns the dbdat for a particular dbfile inside the area
-;; ;;
-;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile)
-;; (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))
-;;
-;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
-;; (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
-;;
-;; (define (db:run-id->first-num run-id)
-;; (let* ((s (number->string run-id))
-;; (l (string-length s)))
-;; (substring s (- l 1) l)))
-
-;; 1234 => 4/1234.db
-;; #f => 0/main.db
-;; (abandoned the idea of num/db)
-;;
(define (dbfile:run-id->path apath run-id)
(conc apath"/"(dbfile:run-id->dbname run-id)))
(define (db:dbname->path apath dbname)
(conc apath"/"dbname))
@@ -215,14 +197,12 @@
(cond
(*dbstruct-dbs*
(dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
*dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
(else
- (let* ((dbstruct (make-dbr:dbstruct)))
+ (let* ((dbstruct (make-dbr:dbstruct areapath: areapath tmppath: tmppath)))
(set! *dbstruct-dbs* dbstruct)
- (dbr:dbstruct-areapath-set! dbstruct areapath)
- (dbr:dbstruct-tmppath-set! dbstruct tmppath)
dbstruct))))
(define (dbfile:get-subdb dbstruct run-id)
(let* ((dbfname (dbfile:run-id->dbname run-id)))
(hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))
@@ -415,11 +395,10 @@
"cp "backupfname" "fname)))
(dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
" "cmd)
(system cmd)))
-
(define (dbfile:open-no-sync-db dbpath)
(if *no-sync-db*
*no-sync-db*
(begin
(if (not (file-exists? dbpath))
@@ -429,13 +408,20 @@
(init-proc (lambda (db)
(if (not db-exists)
(begin
(sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
)))
- (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname)))
- ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
- ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database
+ (on-tmp (equal? (car (string-split dbpath "/")) "tmp"))
+ (db (if on-tmp
+ (dbfile:cautious-open-database dbname init-proc 0 "WAL")
+ (dbfile:cautious-open-database dbname init-proc 0 #f)
+ ;; (sqlite3:open-database dbname)
+ )))
+ (if on-tmp ;; done in cautious-open-database
+ (begin
+ (sqlite3:execute db "PRAGMA synchronous = 0;")
+ (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))
(set! *no-sync-db* db)
db))))
(define (db:no-sync-set db var val)
(sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
@@ -570,11 +556,11 @@
(tmpdb (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
(start-t (current-seconds)))
(mutex-lock! *db-multi-sync-mutex*)
(let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
(mutex-unlock! *db-multi-sync-mutex*)
- (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
+ (db:sync-tables (db:sync-all-tables-list keys) update_info tmpdb mtdb))
(mutex-lock! *db-multi-sync-mutex*)
(set! *db-last-sync* start-t)
(set! *db-last-access* start-t)
(mutex-unlock! *db-multi-sync-mutex*)
(dbfile:add-dbdat dbstruct run-id tmpdb)
@@ -633,12 +619,12 @@
'("type" #f)
'("last_update" #f))))
;; needs db to get keys, this is for syncing all tables
;;
-(define (db:sync-main-list dbstruct keys)
- (let ((keys keys)) ;; (db:get-keys dbstruct)))
+(define (db:sync-main-list keys)
+ (let ((keys keys))
(list
(list "keys"
'("id" #f)
'("fieldname" #f)
'("fieldtype" #f))
@@ -689,12 +675,12 @@
'("params" #f)
'("creation_time" #f)
'("execution_time" #f))
)))
-(define (db:sync-all-tables-list dbstruct keys)
- (append (db:sync-main-list dbstruct keys)
+(define (db:sync-all-tables-list keys)
+ (append (db:sync-main-list keys)
db:sync-tests-only))
;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are dbdat's
;;
@@ -945,30 +931,10 @@
FOR EACH ROW
BEGIN
UPDATE test_data SET last_update=(strftime('%s','now'))
WHERE id=old.id;
END;" )))
-;;
-;; ADD run-id SUPPORT
-;;
-(define (db:create-all-triggers dbstruct)
- (db:with-db
- dbstruct #f #f
- (lambda (dbdat db)
- (db:create-triggers db))))
-
-(define (db:create-triggers db)
- (for-each (lambda (key)
- (sqlite3:execute db (cadr key)))
- db:trigger-list))
-
-(define (db:drop-all-triggers dbstruct)
- (db:with-db
- dbstruct #f #f
- (lambda (dbdat db)
- (db:drop-triggers db))))
-
(define (db:is-trigger-dropped db tbl-name)
(let* ((trigger-name (if (equal? tbl-name "test_steps")
"update_teststeps_trigger"
(conc "update_" tbl-name "_trigger")))
(res #f))
@@ -1012,10 +978,14 @@
;; call with dbinit=db:initialize-main-db
;;
(define (db:open-db dbstruct run-id dbinit)
;; (mutex-lock! *db-open-mutex*)
(let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
+ #;(case (rmt:transport-mode)
+ ((http) (dbfile:open-db dbstruct run-id dbinit))
+ ((tcp) (dbmod:open-db dbstruct run-id dbinit))
+ (else (assert #f "FATAL: rmt:transport-node not correct value"(rmt:transport-mode))))
(set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
;; (mutex-unlock! *db-open-mutex*)
dbdat))
(define dbfile:db-init-proc (make-parameter #f))
@@ -1078,11 +1048,11 @@
(define no-condition-db-with-db (make-parameter #t))
;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
-(define (db:with-db dbstruct run-id r/w proc . params)
+(define (dbfile:with-db dbstruct run-id r/w proc params)
(assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
(assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
(let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
(have-struct (dbr:dbstruct? dbstruct))
(dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly
@@ -1268,58 +1238,8 @@
(let* ((newstmth (sqlite3:prepare db stmt)))
;; (db:hoh-set! stmt-cache db stmt newstmth)
(hash-table-set! stmt-cache stmt newstmth)
newstmth))))
-(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
- (let* ((incompleted '())
- (oldlaunched '())
- (toplevels '())
- ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
- (deadtime (or ovr-deadtime 72000))) ;; twenty hours
- (db:with-db
- dbstruct run-id #f
- (lambda (dbdat db)
-
- ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
- ;;
- ;; HOWEVER: this code in run:test seems to work fine
- ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
- ;; (db:test-get-run_duration testdat)))
- ;; 600)
- ;; (db:delay-if-busy dbdat)
- (sqlite3:for-each-row
- (lambda (test-id run-dir uname testname item-path)
- (if (and (equal? uname "n/a")
- (equal? item-path "")) ;; this is a toplevel test
- ;; what to do with toplevel? call rollup?
- (begin
- (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
- ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
- (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
- (db:get-cache-stmth dbdat db
- "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
- run-id deadtime)
-
- ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
- ;;
- ;; (db:delay-if-busy dbdat)
- (sqlite3:for-each-row
- (lambda (test-id run-dir uname testname item-path)
- (if (and (equal? uname "n/a")
- (equal? item-path "")) ;; this is a toplevel test
- ;; what to do with toplevel? call rollup?
- (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
- (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
- (db:get-cache-stmth dbdat db
- "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
- run-id)
-
- ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
- (if (and (null? incompleted)
- (null? oldlaunched)
- (null? toplevels))
- #f
- #t)))))
)
Index: dbmod.scm
==================================================================
--- dbmod.scm
+++ dbmod.scm
@@ -17,40 +17,301 @@
;; along with Megatest. If not, see .
;;======================================================================
(declare (unit dbmod))
+(declare (uses dbfile))
+(declare (uses commonmod))
+(declare (uses debugprint))
(module dbmod
*
-(import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:)
- posix typed-records srfi-18
- srfi-69)
-
-(define (db:run-id->dbname run-id)
- (cond
- ((number? run-id)(conc run-id ".db"))
- ((not run-id) "main.db")
- (else run-id)))
-
-
-;;======================================================================
-;; hash of hashs
-;;======================================================================
-
-
-(define (db:hoh-set! dat key1 key2 val)
- (let* ((subhash (hash-table-ref/default dat key1 #f)))
- (if subhash
- (hash-table-set! subhash key2 val)
- (begin
- (hash-table-set! dat key1 (make-hash-table))
- (db:hoh-set! dat key1 key2 val)))))
-
-(define (db:hoh-get dat key1 key2)
- (let* ((subhash (hash-table-ref/default dat key1 #f)))
- (and subhash
- (hash-table-ref/default subhash key2 #f))))
+(import scheme
+ chicken
+ data-structures
+ extras
+
+ (prefix sqlite3 sqlite3:)
+ posix
+ typed-records
+ srfi-1
+ srfi-18
+ srfi-69
+
+ commonmod
+ dbfile
+ debugprint
+ )
+
+;; NOTE: This returns only the name "1.db", "main.db", not the path
+;;
+(define (dbmod:run-id->dbfname run-id)
+ (conc (dbfile:run-id->dbnum run-id)".db"))
+
+(define (dbmod:get-dbdir dbstruct)
+ (let* ((areapath (dbr:dbstruct-areapath dbstruct))
+ (dbdir (conc areapath"/.megatest")))
+ (if (and (file-write-access? areapath)
+ (not (file-exists? dbdir)))
+ (create-directory dbdir))
+ dbdir))
+
+(define (dbmod:run-id->full-dbfname dbstruct run-id)
+ (conc (dbmod:get-dbdir dbstruct run-id)"/"(dbmod:run-id->dbfname run-id)))
+
+;;======================================================================
+;; Read-only inmem cached direct from disk method
+;;======================================================================
+
+(define *dbmod:nfs-db-handles* (make-hash-table)) ;; dbfname -> dbstruct
+
+(define (dbmod:nfs-get-dbstruct run-id keys init-proc areapath)
+ (assert areapath "FATAL: dbmod:nfs-get-dbstruct called without areapath set.")
+ (let* ((dbfname (dbmod:run-id->dbfname run-id))
+ (dbstruct (hash-table-ref/default *dbmod:nfs-db-handles* dbfname #f)))
+ (if dbstruct
+ (let* ((last-update (dbr:dbstruct-last-update dbstruct))
+ (curr-secs (current-seconds)))
+ (if (> (- curr-secs last-update) 2)
+ (begin
+ ((dbr:dbstruct-sync-proc dbstruct) last-update)
+ (dbr:dbstruct-last-update-set! dbstruct curr-secs)))
+ dbstruct)
+ (let* ((newdbstruct (dbmod:open-dbmoddb areapath run-id dbfname init-proc keys syncdir: 'fromdisk)))
+ (hash-table-set! *dbmod:nfs-db-handles* dbfname newdbstruct)
+ newdbstruct))))
+
+;;======================================================================
+;; The inmem one-db file per server method goes in here
+;;======================================================================
+
+(define (dbmod:with-db dbstruct run-id r/w proc params)
+ (let* ((dbdat (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
+ (dbh (dbr:dbdat-dbh dbdat))
+ (dbfile (dbr:dbdat-dbfile dbdat)))
+ (apply proc dbdat dbh params)))
+
+(define (dbmod:open-inmem-db initproc)
+ (let* ((db (sqlite3:open-database ":memory:"))
+ (handler (sqlite3:make-busy-timeout 3600)))
+ (sqlite3:set-busy-handler! db handler)
+ (initproc db)
+ db))
+
+(define (dbmod:open-db dbstruct run-id dbinit)
+ (or (dbr:dbstruct-dbdat dbstruct)
+ (let* ((dbdat (make-dbr:dbdat
+ dbfile: (dbr:dbstruct-dbfile dbstruct)
+ dbh: (dbr:dbstruct-inmem dbstruct)
+ )))
+ (dbr:dbstruct-dbdat-set! dbstruct dbdat)
+ dbdat)))
+
+;; Open the inmem db and the on-disk db
+;; populate the inmem db with data
+;;
+;; Updates fields in dbstruct
+;; Returns dbstruct
+;;
+;; * This routine creates the db if not found
+;; * Probably can get rid of the dbstruct-in
+;;
+(define (dbmod:open-dbmoddb areapath run-id dbfname-in init-proc keys
+ #!key (dbstruct-in #f)
+ (syncdir 'todisk))
+ (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
+ (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))
+ (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept
+ (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
+ (dbexists (file-exists? dbfullname))
+ (inmem (dbmod:open-inmem-db init-proc))
+ (write-access (file-write-access? dbpath))
+ (db (dbfile:with-simple-file-lock
+ (conc dbfullname".lock")
+ (lambda ()
+ (let* ((db (sqlite3:open-database dbfullname))
+ (handler (sqlite3:make-busy-timeout 136000)))
+ (sqlite3:set-busy-handler! db handler)
+ (if write-access
+ (init-proc db))
+ db))))
+ (tables (db:sync-all-tables-list keys)))
+ (dbr:dbstruct-inmem-set! dbstruct inmem)
+ (dbr:dbstruct-ondiskdb-set! dbstruct db)
+ (dbr:dbstruct-dbfile-set! dbstruct dbfullname)
+ (dbr:dbstruct-sync-proc-set! dbstruct
+ (lambda (last-update)
+ (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
+ (dbmod:sync-tables tables last-update inmem db)
+ (dbmod:sync-tables tables last-update db inmem))))
+ (dbmod:sync-tables tables #f db inmem)
+ (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
+ dbstruct))
+
+(define (dbmod:close-db dbstruct)
+ ;; do final sync to disk file
+ ;; (do-sync ...)
+ (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct)))
+
+;;======================================================================
+;; Sync db
+;;======================================================================
+
+(define (dbmod:calc-use-last-update has-last-update fields last-update)
+ (cond
+ ((and has-last-update
+ (member "last_update" fields))
+ #t) ;; if given a number, just use it for all fields
+ ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
+ ((and (pair? last-update)
+ (member (car last-update) ;; last-update field name
+ (map car fields)))
+ #t)
+ ((and last-update (not (pair? last-update)) (not (number? last-update)))
+ (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
+ #f)
+ (else
+ #f)))
+
+;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
+;; dbs are sqlite3 db handles
+;;
+;; if last-update specified ("field-name" . time-in-seconds)
+;; then sync only records where field-name >= time-in-seconds
+;; IFF field-name exists
+;;
+;; Use (db:sync-all-tables-list keys) to get the tbls input
+;;
+(define (dbmod:sync-tables tbls last-update fromdb todb)
+ (let ((stmts (make-hash-table)) ;; table-field => stmt
+ (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 ))
+ (numrecs (make-hash-table))
+ (start-time (current-milliseconds))
+ (tot-count 0))
+ (for-each ;; table
+ (lambda (tabledat)
+ (let* ((tablename (car tabledat))
+ (fields (cdr tabledat))
+ (has-last-update (member "last_update" fields))
+ (use-last-update (dbmod:calc-use-last-update has-last-update fields last-update))
+ (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
+ (if (number? last-update)
+ last-update
+ (cdr last-update))
+ #f))
+ (last-update-field (if use-last-update
+ (if (number? last-update)
+ "last_update"
+ (car last-update))
+ #f))
+ (num-fields (length fields))
+ (field->num (make-hash-table))
+ (num->field (apply vector (map car fields))) ;; BBHERE
+ (full-sel (conc "SELECT " (string-intersperse (map car fields) ",")
+ " FROM " tablename (if use-last-update ;; apply last-update criteria
+ (conc " WHERE " last-update-field " >= " last-update-value)
+ "")
+ ";"))
+ (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
+ " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
+ (fromdat '())
+ (fromdats '())
+ (totrecords 0)
+ (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
+ (todat (make-hash-table))
+ (count 0)
+ (field-names (map car fields)))
+
+ ;; set up the field->num table
+ (for-each
+ (lambda (field)
+ (hash-table-set! field->num field count)
+ (set! count (+ count 1)))
+ fields)
+
+ ;; read the source table
+ ;; store a list of all rows in the table in fromdat, up to batch-len.
+ ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
+ (sqlite3:for-each-row
+ (lambda (a . b)
+ (set! fromdat (cons (apply vector a b) fromdat))
+ (if (> (length fromdat) batch-len)
+ (begin
+ (set! fromdats (cons fromdat fromdats))
+ (set! fromdat '())
+ (set! totrecords (+ totrecords 1)))))
+ fromdb
+ full-sel)
+
+ ;; Count less than batch-len as a record
+ (if (> (length fromdat) 0)
+ (set! totrecords (+ totrecords 1)))
+
+ ;; tack on remaining records in fromdat
+ (if (not (null? fromdat))
+ (set! fromdats (cons fromdat fromdats)))
+
+ (sqlite3:for-each-row
+ (lambda (a . b)
+ (hash-table-set! todat a (apply vector a b)))
+ todb
+ full-sel)
+
+ ;; first pass implementation, just insert all changed rows
+ (let* ((db todb)
+ (drp-trigger (if (member "last_update" field-names)
+ (db:drop-trigger db tablename)
+ #f))
+ (has-last-update (member "last_update" field-names))
+ (is-trigger-dropped (if has-last-update
+ (db:is-trigger-dropped db tablename)
+ #f))
+ (stmth (sqlite3:prepare db full-ins))
+ (changed-rows 0))
+ (for-each
+ (lambda (fromdat-lst)
+ (sqlite3:with-transaction
+ db
+ (lambda ()
+ (for-each ;;
+ (lambda (fromrow)
+ (let* ((a (vector-ref fromrow 0))
+ (curr (hash-table-ref/default todat a #f))
+ (same #t))
+ (let loop ((i 0))
+ (if (or (not curr)
+ (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
+ (set! same #f))
+ (if (and same
+ (< i (- num-fields 1)))
+ (loop (+ i 1))))
+ (if (not same)
+ (begin
+ (apply sqlite3:execute stmth (vector->list fromrow))
+ (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
+ (set! changed-rows (+ changed-rows 1))))))
+ fromdat-lst))))
+ fromdats)
+
+ (sqlite3:finalize! stmth)
+ (if (member "last_update" field-names)
+ (db:create-trigger db tablename)))))
+ tbls)
+ (let* ((runtime (- (current-milliseconds) start-time))
+ (should-print (or ;; (debug:debug-mode 12)
+ (common:low-noise-print 120 "db sync")
+ (> runtime 500)))) ;; low and high sync times treated as separate.
+ (for-each
+ (lambda (dat)
+ (let ((tblname (car dat))
+ (count (cdr dat)))
+ (set! tot-count (+ tot-count count))))
+ (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
+ tot-count))
+
+;;======================================================================
+;; Moved from dbfile
+;;======================================================================
+
)
Index: dcommon.scm
==================================================================
--- dcommon.scm
+++ dcommon.scm
@@ -708,11 +708,13 @@
#:numlin-visible 5
))
(colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
(updater (lambda ()
(if (dashboard:monitor-changed? commondat tabdat)
- (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10)))
+ (let ((servers (case (rmt:transport-mode)
+ ((http)(server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath* limit: 10)))
+ (else '()))))
(iup:attribute-set! servers-matrix "NUMLIN" (length servers))
;; (set! colnum 0)
;; (for-each (lambda (colname)
;; ;; (print "colnum: " colnum " colname: " colname)
;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -33,17 +33,19 @@
(declare (uses common))
(declare (uses commonmod))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))
+(declare (uses dbfile))
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")
-(import commonmod)
+(import commonmod
+ dbfile)
;;======================================================================
;; ezsteps
;;======================================================================
@@ -1143,11 +1145,14 @@
(setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
(begin
(debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
(set! *toppath* #f) ;; force it to be false so we return #f
#f))
-
+
+ ;; needed by various transport and db modules
+ (dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*))
+
;; one more attempt to cache the configs for future reading
(let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
(mtcachef (car cachefiles))
(rccachef (cdr cachefiles)))
@@ -1579,11 +1584,11 @@
;; (list 'serverinf *server-info*)
#;(list 'homehost (let* ((hhdat (server:get-homehost)))
(if hhdat
(car hhdat)
#f)))
- (list 'serverurl (if *runremote*
+ #;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED
(remote-server-url *runremote*)
#f)) ;;
(list 'areaname (common:get-testsuite-name))
(list 'toppath *toppath*)
(list 'work-area work-area)
Index: margs.scm
==================================================================
--- margs.scm
+++ margs.scm
@@ -23,10 +23,20 @@
(define (args:get-arg arg . default)
(if (null? default)
(hash-table-ref/default args:arg-hash arg #f)
(hash-table-ref/default args:arg-hash arg (car default))))
+
+;; get an arg as a number
+(define (args:get-arg-number arg . default)
+ (let* ((val-str (args:get-arg arg))
+ (val (if val-str (string->number val-str) #f)))
+ (if val
+ val
+ (if (null? default)
+ #f
+ default))))
(define (args:any? . args)
(not (null? (filter (lambda (x) x)
(map args:get-arg args)))))
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -23,10 +23,18 @@
(define (toplevel-command . a) #f)
(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses margs))
+(declare (uses mtargs))
+(declare (uses mtargs.import))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
+(declare (uses mtargs))
+(declare (uses mtargs.import))
+(declare (uses debugprint))
+(declare (uses debugprint.import))
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
@@ -41,27 +49,29 @@
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses db))
+(declare (uses dbfile))
+(declare (uses dbfile.import))
(declare (uses dbmod))
(declare (uses dbmod.import))
-(declare (uses commonmod))
-(declare (uses commonmod.import))
-(declare (uses dbfile))
-(declare (uses dbfile.import))
+(declare (uses tcp-transportmod))
+(declare (uses tcp-transportmod.import))
;; (declare (uses debugprint))
;; (declare (uses debugprint.import))
-;; (declare (uses mtargs))
-;; (declare (uses mtargs.import))
;; (declare (uses ftail))
;; (import ftail)
-(import dbmod
+(import mtargs
+ debugprint
+ dbmod
commonmod
- dbfile)
+ dbfile
+ tcp-transportmod
+ )
(define *db* #f) ;; this is only for the repl, do not use in general!!!!
(include "common_records.scm")
(include "key_records.scm")
@@ -69,11 +79,11 @@
(include "run_records.scm")
(include "megatest-fossil-hash.scm")
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
readline apropos json http-client directory-utils typed-records
- http-client srfi-18 extras format)
+ http-client srfi-18 extras format tcp-server tcp)
;; Added for csv stuff - will be removed
;;
(use sparse-vectors)
@@ -80,10 +90,14 @@
(require-library mutils)
(define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file
(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file
+;; set some parameters here - these need to be put in something that can be loaded from other
+;; executables such as dashboard and mtutil
+;;
+(include "transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)
;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
@@ -373,10 +387,11 @@
"-load" ;; load and exectute a scheme file
"-section"
"-var"
"-dumpmode"
"-run-id"
+ "-db"
"-ping"
"-refdb2dat"
"-o"
"-log"
"-sync-log"
@@ -585,16 +600,16 @@
;; where (launch:setup) returns #f?
;;
(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
(handle-exceptions
exn
- (begin
- (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
- )
+ (begin
+ (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
(let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
+ (dbname (args:get-arg "-db")) ;; for the server logfile name
(logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
- (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log")))
+ (conc tl "/logs/server-"(or dbname "unk")"-"(current-process-id) "-" (get-host-name) ".log")))
(oup (open-logfile logf)))
(if (not (args:get-arg "-log"))
(hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
(debug:print-info 0 *default-log-port* "Sending log output to " logf)
(set! *default-log-port* oup))))
@@ -921,13 +936,24 @@
;;======================================================================
;; Server? Start up here.
;;
(if (args:get-arg "-server")
- (let ((tl (launch:setup)))
- ;; (server:launch 0 'http)
- (http-transport:launch)
+ (let* (;; (run-id (args:get-arg "-run-id"))
+ (dbfname (args:get-arg "-db"))
+ (tl (launch:setup))
+ (keys (keys:config-get-fields *configdat*)))
+ (case (rmt:transport-mode)
+ ((http)(http-transport:launch))
+ ((tcp)
+ (debug:print 0 *default-log-port* "INFO: Running using tcp method.")
+ (if dbfname
+ (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys)
+ (begin
+ (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.")
+ (exit 1))))
+ (else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode))))
(set! *didsomething* #t)))
;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
@@ -940,20 +966,26 @@
(args:get-arg "-kill-servers"))
(let ((tl (launch:setup)))
(if tl ;; all roads from here exit
(let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
(fmtstr "~33a~22a~20a~20a~8a\n"))
- (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
- (format #t fmtstr "==" "=========" "=========" "========" "=====")
- (for-each ;; ( mod-time host port start-time pid )
+ (if (not servers)
+ (begin
+ (debug:print-info 1 *default-log-port* "No servers found")
+ (exit)
+ )
+ )
+ (format #t fmtstr "PID" "host:port" "age (hms)" "Last mod" "State")
+ (format #t fmtstr "===" "=========" "=========" "========" "=====")
+ (for-each ;; (ip-addr port? mod-time host port start-time pid )
(lambda (server)
- (let* ((mtm (any->number (car server)))
+ (let* ((mtm (any->number (caddr server)))
(mod (if mtm (- (current-seconds) mtm) "unk"))
- (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
- (url (conc (cadr server) ":" (caddr server)))
+ (age (- (current-seconds)(or (any->number mtm) (current-seconds))))
(pid (list-ref server 4))
- (alv (if (number? mod)(< mod 10) #f)))
+ (url (conc (car server) ":" (cadr server)))
+ (alv (if (number? mod)(< mod 360) #f)))
(format #t
fmtstr
pid
url
(seconds->hr-min-sec age)
@@ -966,11 +998,10 @@
(server:kill server)))))
(sort servers (lambda (a b)
(let ((ma (or (any->number (car a)) 9e9))
(mb (or (any->number (car b)) 9e9)))
(> ma mb)))))
- ;; (debug:print-info 1 *default-log-port* "Done with listservers")
(set! *didsomething* #t)
(exit))
(exit))))
;; must do, would have to add checks to many/all calls below
@@ -1370,12 +1401,11 @@
;; IDEA: megatest list -runname blah% ...
;;
(if (or (args:get-arg "-list-runs")
(args:get-arg "-list-db-targets"))
(if (launch:setup)
- (let* (;; (dbstruct (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local")))
- (runpatt (args:get-arg "-list-runs"))
+ (let* ((runpatt (args:get-arg "-list-runs"))
(access-mode (db:get-access-mode))
(testpatt (common:args-get-testpatt #f))
;; (if (args:get-arg "-testpatt")
;; (args:get-arg "-testpatt")
;; "%"))
@@ -2041,11 +2071,11 @@
(if (args:get-arg "-extract-ods")
(general-run-call
"-extract-ods"
"Make ods spreadsheet"
(lambda (target runname keys keyvals)
- (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: #t))
+ (let ((dbstruct (make-dbr:dbstruct areapath: *toppath* local: #t))
(outputfile (args:get-arg "-extract-ods"))
(runspatt (or (args:get-arg "-runname")(args:get-arg ":runname")))
(pathmod (args:get-arg "-pathmod")))
;; (keyvalalist (keys->alist keys "%")))
(debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals)
@@ -2308,14 +2338,16 @@
(begin
(if (not (launch:setup))
(begin
(debug:print 0 *default-log-port* "Failed to setup, exiting")
(exit 1)))
- (if (not (server:choose-server *toppath* 'home?))
- (begin
- (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db")
- (exit 1)))
+
+;; (if (not (server:choose-server *toppath* 'home?))
+;; (begin
+;; (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db")
+;; (exit 1)))
+
(let ((dbstructs (db:setup #f)))
(common:cleanup-db dbstructs))
(set! *didsomething* #t)))
(if (args:get-arg "-mark-incompletes")
@@ -2370,11 +2402,13 @@
(if (or (getenv "MT_RUNSCRIPT")
(args:get-arg "-repl")
(args:get-arg "-load"))
(let* ((toppath (launch:setup))
(dbstructs (if (and toppath
- (server:choose-server toppath 'home?))
+ ;; NOTE: server:choose-server is starting a server
+ ;; either add equivalent for tcp mode or ????
+ #;(server:choose-server toppath 'home?))
(db:setup #t)
#f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
(if *toppath*
(cond
((getenv "MT_RUNSCRIPT")
Index: mtargs/mtargs.scm
==================================================================
--- mtargs/mtargs.scm
+++ mtargs/mtargs.scm
@@ -18,16 +18,17 @@
(module mtargs
(
arg-hash
get-arg
+ get-arg-number
get-arg-from
get-args
usage
print-args
any-defined?
- )
+ )
(import scheme) ;; gives us cond-expand in chicken-4
(cond-expand
(chicken-5
@@ -42,10 +43,20 @@
(define (get-arg arg . default)
(if (null? default)
(hash-table-ref/default arg-hash arg #f)
(hash-table-ref/default arg-hash arg (car default))))
+
+;; get an arg as a number
+(define (get-arg-number arg . default)
+ (let* ((val-str (get-arg arg))
+ (val (if val-str (string->number val-str) #f)))
+ (if val
+ val
+ (if (null? default)
+ #f
+ default))))
(define (any-defined? . args)
(not (null? (filter (lambda (x) x)
(map get-arg args)))))
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -21,16 +21,32 @@
(use format typed-records) ;; RADT => purpose of json format??
(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
+(declare (uses commonmod))
(declare (uses dbfile))
+;; (declare (uses dbmemmod))
+(declare (uses dbmod))
+(declare (uses tcp-transportmod))
(include "common_records.scm")
;; (declare (uses rmtmod))
+;; used by http-transport
(import dbfile) ;; rmtmod)
+(import commonmod
+;; dbmemmod
+ dbfile
+ dbmod
+ tcp-transportmod)
+
+;; http - use the old http + in /tmp db
+;; tcp - use tcp transport with inmem db
+;; nfs - use direct to disk access (read-only)
+;;
+(define rmt:transport-mode (make-parameter 'http))
;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;
;; generate entries for ~/.megatestrc with the following
@@ -61,42 +77,44 @@
(cdr hh-dat)
(begin
(debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
#f))))
+(define (make-and-init-remote areapath)
+ (case (rmt:transport-mode)
+ ((http)(make-remote))
+ ((tcp) (tt:make-remote areapath))
+ (else #f)))
;;======================================================================
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
-
- #;(common:telemetry-log (conc "rmt:"(->string cmd))
- payload: `((rid . ,rid)
- (params . ,params)))
-
+ (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.")
(if (> attemptnum 2)
(debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
-
+
(cond
((> attemptnum 2) (thread-sleep! 0.05))
((> attemptnum 10) (thread-sleep! 0.5))
((> attemptnum 20) (thread-sleep! 1)))
+
+ ;; I'm turning this off, it may make sense to move it
+ ;; into http-transport-handler
(if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))
- (begin (server:run *toppath*) (thread-sleep! 3)))
-
-
- ;;DOT digraph megatest_state_status {
- ;;DOT ranksep=0;
- ;;DOT // rankdir=LR;
- ;;DOT node [shape="box"];
- ;;DOT "rmt:send-receive" -> MUTEXLOCK;
- ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
- ;; do all the prep locked under the rmt-mutex
- (mutex-lock! *rmt-mutex*)
+ (begin
+ (debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.")
+ (case (rmt:transport-mode)
+ ((http)
+ (server:run *toppath*)
+ (thread-sleep! 3))
+ (else
+ (thread-sleep! 1) ;; for tcp the server is started by routines in tcp-transportmod. For nfs there is no server
+ ))))
;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
;; 3. do the query, if on homehost use local access
;;
@@ -103,79 +121,76 @@
(let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
(areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
(runremote (or area-dat
*runremote*))
(attemptnum (+ 1 attemptnum))
- (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
-
- ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
- ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
- ;; ensure we have a record for our connection for given area
- (if (not runremote) ;; can remove this one. should never get here.
- (begin
- (set! *runremote* (make-remote))
- (let* ((server-info (remote-server-info *runremote*)))
- (if server-info
- (begin
- (remote-server-url-set! *runremote* (server:record->url server-info))
- (remote-server-id-set! *runremote* (server:record->id server-info)))))
- (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
-
- (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)))
-
+ (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))
+ (testsuite (common:get-testsuite-name))
+ (mtexe (common:find-local-megatest)))
+
+ (case (rmt:transport-mode)
+ ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))
+ ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
+ ((nfs) (nfs:transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
+ )))
+
+(define (nfs:transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
+ (let* ((keys (common:get-fields *configdat*))
+ (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath)))
+ (api:dispatch-request dbstruct cmd run-id params)))
+
+(define (tcp-transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
+ (if (not runremote)
+ (let* ((newremote (make-and-init-remote areapath)))
+ (set! *runremote* newremote)
+ (set! runremote newremote)))
+ (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id)))
+ (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))
+
(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)
- ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
- ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
- ;; DOT SET_HOMEHOST -> MUTEXLOCK;
+ ;; do all the prep locked under the rmt-mutex
+ (mutex-lock! *rmt-mutex*)
+
+ ;; ensure we have a record for our connection for given area
+ (if (not runremote) ;; can remove this one. should never get here.
+ (begin
+ (set! *runremote* (make-and-init-remote areapath))
+ (let* ((server-info (remote-server-info *runremote*)))
+ (if server-info
+ (begin
+ (remote-server-url-set! *runremote* (server:record->url server-info))
+ (remote-server-id-set! *runremote* (server:record->id server-info)))))
+ (set! runremote *runremote*))) ;; new runremote will come from this on next iteration
+
;; ensure we have a homehost record
(if (or (not (pair? (remote-hh-dat runremote))) ;; not on homehost
(not (cdr (remote-hh-dat runremote)))) ;; not on homehost
(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
(let ((hh-data (server:choose-server areapath 'homehost)))
(remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
- ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
(cond
- #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
- (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
- (set! *runremote* #f)
- ;; BUG: close-connections should go here?
- (mutex-unlock! *rmt-mutex*)
- (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
-
- ;;DOT EXIT;
- ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
;; give up if more than 150 attempts
((> attemptnum 150)
(debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
(exit 1))
- ;;DOT CASE2 [label="local\nreadonly\nquery"];
- ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
- ;;DOT CASE2 -> "rmt:open-qry-close-locally";
;; readonly mode, read request- handle it - case 2
((and readonly-mode
(member cmd api:read-only-queries))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
(rmt:open-qry-close-locally cmd 0 params)
)
- ;;DOT CASE3 [label="write in\nread-only mode"];
- ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
- ;;DOT CASE3 -> "#f";
;; readonly mode, write request. Do nothing, return #f
(readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
;;
- ;;DOT CASE4 [label="reset\nconnection"];
- ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
- ;;DOT CASE4 -> "rmt:send-receive";
;; reset the connection if it has been unused too long
((and runremote
(remote-api-url runremote)
(> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
(+ (remote-last-access runremote)
@@ -185,61 +200,27 @@
;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
(mutex-unlock! *rmt-mutex*)
(rmt:send-receive cmd rid params attemptnum: attemptnum))
- ;;DOT CASE5 [label="local\nread"];
- ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
- ;;DOT CASE5 -> "rmt:open-qry-close-locally";
-
;; on homehost and this is a read
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
(rmt:on-homehost? runremote)
(member cmd api:read-only-queries)) ;; this is a read
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
(rmt:open-qry-close-locally cmd 0 params))
- ;;DOT CASE6 [label="init\nremote"];
- ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
- ;;DOT CASE6 -> "rmt:send-receive";
- ;; on homehost and this is a write, we already have a server, but server has died
-
- ;; reinstate this keep-alive section but inject a time condition into the (add ...
- ;;
- ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost
- ;; (not (member cmd api:read-only-queries)) ;; this is a write
- ;; (remote-server-url runremote) ;; have a server
- ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
- ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
- ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
- ;; (set! *runremote* (make-remote))
- ;; (let* ((server-info (remote-server-info *runremote*)))
- ;; (if server-info
- ;; (begin
- ;; (remote-server-url-set! *runremote* (server:record->url server-info))
- ;; (remote-server-id-set! *runremote* (server:record->id server-info)))))
- ;; (remote-force-server-set! runremote (common:force-server?))
- ;; (mutex-unlock! *rmt-mutex*)
- ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
- ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
- ;;DOT CASE7 [label="homehost\nwrite"];
- ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
- ;;DOT CASE7 -> "rmt:open-qry-close-locally";
;; on homehost and this is a write, we already have a server
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
(cdr (remote-hh-dat runremote)) ;; on homehost
(not (member cmd api:read-only-queries)) ;; this is a write
(remote-server-url runremote)) ;; have a server (needed to sync written data back)
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
(rmt:open-qry-close-locally cmd 0 params))
- ;;DOT CASE8 [label="force\nserver"];
- ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
- ;;DOT CASE8 -> "rmt:open-qry-close-locally";
;; on homehost, no server contact made and this is a write, passively start a server
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
(cdr (remote-hh-dat runremote)) ;; have homehost
(not (remote-server-url runremote)) ;; no connection yet
(not (member cmd api:read-only-queries))) ;; not a read-only query
@@ -286,11 +267,10 @@
;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
;;DOT CASE11 -> "RESULT" [label="call succeeded"];
;; not on homehost, do server query
(else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))
-;;DOT }
;; bunch of small functions factored out of send-receive to make debug easier
;;
(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
@@ -373,11 +353,11 @@
res))
(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
(let* ((qry-is-write (not (member cmd api:read-only-queries)))
(db-file-path (db:dbfile-path)) ;; 0))
- (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t)))
+ (dbstructs-local (db:setup #t))
(read-only (not (file-write-access? db-file-path)))
(start (current-milliseconds))
(resdat (if (not (and read-only qry-is-write))
(let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
@@ -1021,23 +1001,36 @@
(rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
(define (rmt:test-get-archive-block-info archive-block-id)
(rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
-
(define (rmtmod:calc-ro-mode runremote *toppath*)
- (if (and runremote
- (remote-ro-mode-checked runremote))
- (remote-ro-mode runremote)
- (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
- (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
- (if runremote
- (begin
- (remote-ro-mode-set! runremote ro-mode)
- (remote-ro-mode-checked-set! runremote #t)
- ro-mode)
- ro-mode))))
+ (case (rmt:transport-mode)
+ ((http)
+ (if (and runremote
+ (remote-ro-mode-checked runremote))
+ (remote-ro-mode runremote)
+ (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
+ (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+ (if runremote
+ (begin
+ (remote-ro-mode-set! runremote ro-mode)
+ (remote-ro-mode-checked-set! runremote #t)
+ ro-mode)
+ ro-mode))))
+ ((tcp)
+ (if (and runremote
+ (tt-ro-mode-checked runremote))
+ (tt-ro-mode runremote)
+ (let* ((mtcfgfile (conc *toppath* "/megatest.config"))
+ (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+ (if runremote
+ (begin
+ (tt-ro-mode-set! runremote ro-mode)
+ (tt-ro-mode-checked-set! runremote #t)
+ ro-mode)
+ ro-mode))))))
(define (extras-readonly-mode rmt-mutex log-port cmd params)
(mutex-unlock! rmt-mutex)
(debug:print-info 12 log-port "rmt:send-receive, case 3")
(debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -22,10 +22,11 @@
sxml-modifications matchable)
(declare (unit runs))
(declare (uses db))
(declare (uses common))
+(declare (uses commonmod))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
@@ -37,10 +38,12 @@
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
;; (include "debugger.scm")
+
+(import commonmod)
;; use this struct to facilitate refactoring
;;
(defstruct runs:dat
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -101,24 +101,24 @@
(if *server-id* *server-id*
(let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
(set! *server-id* sig)
*server-id*)))
-;; When using zmq this would send the message back (two step process)
-;; with spiffy or rpc this simply returns the return data to be returned
-;;
-(define (server:reply return-addr query-sig success/fail result)
- (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
- ;; (send-message pubsock target send-more: #t)
- ;; (send-message pubsock
- (case (server:get-transport)
- ((rpc) (db:obj->string (vector success/fail query-sig result)))
- ((http) (db:obj->string (vector success/fail query-sig result)))
- ((fs) result)
- (else
- (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
- result)))
+;; ;; When using zmq this would send the message back (two step process)
+;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;;
+;; (define (server:reply return-addr query-sig success/fail result)
+;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;; ;; (send-message pubsock target send-more: #t)
+;; ;; (send-message pubsock
+;; (case (server:get-transport)
+;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
+;; ((http) (db:obj->string (vector success/fail query-sig result)))
+;; ((fs) result)
+;; (else
+;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;; result)))
;; Given an area path, start a server process ### NOTE ### > file 2>&1
;; if the target-host is set
;; try running on that host
;; incidental: rotate logs in logs/ dir.
@@ -469,10 +469,11 @@
;; find alive rand from youngest
;; 1. sort by age descending
;; 2. take five
;; 3. check alive, discard if not and repeat
;; first we clean up old server files
+ (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode))
(server:clean-up-old areapath)
(let* ((since-last (- (current-seconds) server-last-start))
(server-start-delay 10))
(if ( < (- (current-seconds) server-last-start) 10 )
(begin
@@ -568,11 +569,13 @@
sfiles)))
;; would like to eventually get rid of this
;;
(define (common:on-homehost?)
- (server:choose-server *toppath* 'home?))
+ (if (eq? (rmt:transport-mode) 'http)
+ (server:choose-server *toppath* 'home?)
+ #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
@@ -655,11 +658,11 @@
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
#f)
- (match-let (((mod-time hostname port start-time server-id pid)
+ (match-let (((hostname port start-time server-id pid)
servr))
(tasks:kill-server hostname pid))))
;; called in megatest.scm, host-port is string hostname:port
;;
@@ -676,11 +679,11 @@
(else
#f))))
(cond
((and (list? host-port)
(eq? (length host-port) 2))
- (let* ((myrunremote (make-remote))
+ (let* ((myrunremote (make-and-init-remote *toppath*))
(iface (car host-port))
(port (cadr host-port))
(server-dat (client:connect iface port server-id myrunremote))
(login-res (rmt:login-no-auto-client-setup myrunremote)))
(http-transport:close-connections myrunremote)
ADDED tcp-transportmod.scm
Index: tcp-transportmod.scm
==================================================================
--- /dev/null
+++ tcp-transportmod.scm
@@ -0,0 +1,685 @@
+;;======================================================================
+;; Copyright 2017, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+
+(declare (unit tcp-transportmod))
+(declare (uses debugprint))
+(declare (uses commonmod))
+(declare (uses dbfile))
+(declare (uses dbmod))
+
+(use address-info)
+
+(module tcp-transportmod
+ *
+
+ (import scheme
+ (prefix sqlite3 sqlite3:)
+ chicken
+ data-structures
+
+ address-info
+ directory-utils
+ extras
+ files
+ hostinfo
+ matchable
+ md5
+ message-digest
+ ports
+ posix
+ regex
+ regex-case
+ s11n
+ srfi-1
+ srfi-18
+ srfi-4
+ srfi-69
+ stack
+ typed-records
+ tcp-server
+ tcp
+
+ debugprint
+ commonmod
+ dbfile
+ dbmod
+ )
+
+;;======================================================================
+;; client
+;;======================================================================
+
+;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
+
+;; Used ONLY for client
+;;
+(defstruct tt-conn
+ host
+ port
+ host-port
+ dbfname
+ server-id
+ server-start
+ pid
+)
+
+;; Used for BOTH clients and servers
+(defstruct tt
+ ;; client related
+ (conns (make-hash-table)) ;; dbfname -> conn
+
+ ;; server related
+ (areapath #f)
+ (host #f)
+ (port #f)
+ (conn #f)
+ (cleanup-proc #f)
+ (handler #f) ;; receives data and responds
+ (socket #f)
+ (thread #f)
+ (host-port #f)
+ (cmd-thread #f)
+ (ro-mode #f)
+ (ro-mode-checked #f)
+ (last-access (current-seconds))
+ (servinf-file #f)
+ (last-serv-start 0)
+ )
+
+(define (tt:make-remote areapath)
+ (make-tt areapath: areapath))
+
+;; 1 ... or #f
+(define (tt:valid-run-id run-id)
+ (or (number? run-id)
+ (not run-id)))
+
+;; do all the busy work of finding and setting up conn for
+;; connecting to a server
+;;
+(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
+ (assert (tt:valid-run-id run-id) "FATAL: invalid run-id "run-id)
+ (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
+ (server-start-proc (lambda ()
+ (tt:server-process-run
+ (tt-areapath ttdat)
+ testsuite ;; (dbfile:testsuite-name)
+ (common:find-local-megatest)
+ run-id))))
+ (if conn
+ conn ;; we are already connected to the server
+ (let* ((sdat (tt:get-current-server-info ttdat dbfname)))
+ (match sdat
+ ((host port start-time server-id pid dbfname2 servinffile)
+ (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
+ (let* ((host-port (conc host":"port))
+ (conn (make-tt-conn
+ host: host
+ port: port
+ host-port: host-port
+ dbfname: dbfname
+ servinf-file: servinffile
+ server-id: server-id
+ server-start: start-time
+ pid: pid)))
+ (hash-table-set! (tt-conns ttdat) dbfname conn)
+ ;; verify we can talk to this server
+ (if (tt:ping host port server-id)
+ conn
+ (let* ((curr-secs (current-seconds)))
+ ;; rm the (last server) would go here
+ (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
+ (begin
+ (tt-last-serv-start-set! ttdat curr-secs)
+ (server-start-proc))) ;; don't try and start server unless 30 sec has gone by since last attempt
+ (thread-sleep! 1)
+ (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))
+ (else
+ (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers
+ (begin
+ (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
+ (server-start-proc)
+ (tt-last-serv-start-set! ttdat (current-seconds))))
+ (thread-sleep! 1)
+ (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
+
+(define (tt:ping host port server-id)
+ (let* ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id
+ ;;
+ ;; need two threads, one a 5 second timer
+ ;;
+ (match res
+ ((status errmsg result meta)
+ (if (equal? result server-id)
+ (begin
+ (debug:print 0 *default-log-port* "Ping to "host":"port" successful.")
+ #t) ;; then we are good
+ (begin
+ (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result)
+ #f)))
+ (else
+ ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res)
+ #f))))
+
+;; client side handler
+;;
+;;(tt:handler # get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
+;;
+(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
+ ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
+ (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
+ (if conn
+ ;; have connection, call the server
+ (let* ((res (tt:send-receive ttdat conn cmd run-id params)))
+ ;; res is (status errmsg result meta)
+ (match res
+ ((status errmsg result meta)
+ (if (list? meta)
+ (let* ((delay-wait (alist-ref 'delay-wait meta)))
+ (if (and (number? delay-wait)
+ (> delay-wait 0))
+ (begin
+ (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds")
+ (thread-sleep! delay-wait)))))
+ (case status
+ ((busy) ;; result will be how long the server wants you to delay
+ (debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in "result" seconds.")
+ (thread-sleep! (if (number? result) result 2))
+ (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
+ ((loaded)
+ (debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a 1/4 second.")
+ (thread-sleep! 0.25)
+ (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
+ (else
+ result)))
+ (else
+ (if (not res)
+ (let* ((host (tt-conn-host conn))
+ (port (tt-conn-port conn))
+ ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
+ (pid (tt-conn-pid conn))
+ (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname)))
+ (hash-table-set! (tt-conns ttdat) dbfname #f)
+ (if (file-exists? servinf)
+ (begin
+ (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", attempting to remove servinfo file.")
+ (delete-file* servinf))
+ (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf))
+ (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
+ (assert #f "FATAL: tt:handler received bad data "res)))))
+ (begin
+ (thread-sleep! 1) ;; give it a rest and try again
+ (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))))
+
+ ;; no conn yet, find and or start and find a server
+;; (let* ((server (tt:find-server ttdat dbfname)))
+;; (if server
+;; (let* ((conn (tt:client-connect-to-server server)))
+;; (hash-table-set! (tt-conns ttdat) dbfname conn)
+;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode
+;; dbfname testsuite mtexe))
+;; ;; no server, try to start a server process
+;; (begin
+;; (tt:server-process-run areapath testsuite mtexe run-id) ;; #!key (profile-mode ""))
+;; (thread-sleep! 1)
+;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath
+;; readonly-mode dbfname testsuite mtexe)))))))
+
+(define (tt:bid-for-servership run-id)
+ #f)
+
+;; gets server info and appends path to server file
+;; sorts by age, oldest first
+;;
+;; returns list of (host port startseconds server-id servinfofile)
+;;
+(define (tt:get-server-info-sorted ttdat dbfname)
+ (let* ((areapath (tt-areapath ttdat))
+ (sfiles (tt:find-server areapath dbfname))
+ (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
+ (sorted (sort sdats (lambda (a b)
+ (< (list-ref a 2)(list-ref b 2))))))
+ sorted))
+
+(define (tt:get-current-server-info ttdat dbfname)
+ (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
+ ;;
+ ;; TODO - replace most of below with tt;get-server-info-sorted
+ ;;
+ (let* ((areapath (tt-areapath ttdat))
+ (sfiles (tt:find-server areapath dbfname))
+ (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
+ (sorted (sort sdats (lambda (a b)
+ (< (list-ref a 2)(list-ref b 2))))))
+ (if (null? sorted)
+ #f ;; we'll want to wait until extra servers have exited
+ (car sorted))))
+
+(define (tt:send-receive ttdat conn cmd run-id params)
+ (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
+ (host (tt-conn-host conn))
+ (port (tt-conn-port conn))
+ (dat (list cmd run-id params #f))) ;; no meta data yet
+ (tt:send-receive-direct host port dat)))
+
+(define (tt:send-receive-direct host port dat)
+ (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
+ (handle-exceptions
+ exn
+ #f ;; Add condition-case or better handling here
+ (let-values (((inp oup)(tcp-connect host port)))
+ (let ((res (if (and inp oup)
+ (begin
+ (serialize dat oup)
+ (close-output-port oup)
+ (deserialize inp))
+ )))
+ (close-input-port inp)
+ res))))
+
+
+
+;;======================================================================
+;; server
+;;======================================================================
+
+(define (tt:sync-dbs ttdat)
+ #f)
+
+;; start the listener and start responding to requests
+;;
+;; NOTE: organise by dbfname, not run-id so we don't need
+;; to pull in more modules
+;;
+;; This is the routine called in megatest.scm to start a server
+;;
+;; Server viability is checked in keep-running. Blindly start and run here.
+;;
+(define (tt:start-server areapath run-id dbfname-in handler keys)
+ (assert areapath "FATAL: areapath not provided for tt:start-server")
+ ;; is there already a server for this dbfile? Then exit.
+ (let* ((ttdat (make-tt areapath: areapath))
+ (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))))
+ ;; (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
+ ;; (if (null? servers)
+ (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys)))
+ (tt-handler-set! ttdat (handler dbstruct))
+ (let* ((tcp-thread (make-thread
+ (lambda ()
+ (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data
+ "tcp-server-thread"))
+ (run-thread (make-thread
+ (lambda ()
+ (tt:keep-running ttdat dbfname dbstruct)))))
+ (thread-start! tcp-thread)
+ (thread-start! run-thread)
+ (thread-join! run-thread) ;; run thread will exit on timeout or other conditions
+ (exit)))
+ ;;(begin
+ ;; (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
+ ;; (exit)))))
+ ))
+
+(define (tt:keep-running ttdat dbfname dbstruct)
+ ;; verfiy conn for ready
+ ;; listener socket has been started by this stage
+ ;; wait for a port before creating the registration file
+ ;;
+ (let* ((cleanup (lambda ()
+ (if (tt-cleanup-proc ttdat)
+ ((tt-cleanup-proc ttdat))))))
+ (let loop ((count 0))
+ (if (> count 240)
+ (begin
+ (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
+ (exit 1))
+ (if (not (tt-port ttdat)) ;; no connection yet
+ (begin
+ (thread-sleep! 0.25)
+ (loop (+ count 1))))))
+
+ ;; load or reload the data into inmem db before
+ ;; ((dbr:dbstruct-sync-proc dbstruct) (dbr:dbstruct-last-update dbstruct))
+ ;; (dbr:dbstruct-last-update-set! dbstruct (- (current-seconds) 1))
+ (tt:create-server-registration-file ttdat dbfname)
+ ;; now start watching the last-access, if it hasn't been touched
+ ;; in over ten seconds we exit
+ (thread-sleep! 0.05) ;; any real need for delay here?
+ (let loop ()
+ (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
+ (ok (cond
+ ((null? servers) #f) ;; not ok
+ ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
+ (tt-servinf-file ttdat))
+ (debug:print-info 0 *default-log-port* "Keep running, I'm the top server.")
+ #t)
+ (else
+ (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
+ (let* ((leadsrv (car servers)))
+ (match leadsrv
+ ((host port startseconds server-id pid dbfname servinfofile)
+ (if (tt:ping host port server-id)
+ #f ;; not the server, but all good, want to exit
+ (if (and (file-exists? servinfofile)
+ (> (- (current-seconds)(file-modification-time servinfofile)) 5))
+ (begin
+ ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it
+ (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
+ (delete-file* servinfofile)
+ #t) ;; not the server but the server is not reachable
+ #t)))
+ (else ;; should never get here
+ (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
+ (assert #f "Bad server record "leadsrv))))))))
+ (if ok
+ ;; (if (> *api-process-request-count* 0) ;; have requests in flight
+ ;; (tt-last-access-set! ttdat (current-seconds)))
+ (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
+ (begin
+ (cleanup)
+ (exit)))
+
+ (let* ((last-update (dbr:dbstruct-last-update dbstruct))
+ (curr-secs (current-seconds)))
+ (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
+ (begin
+ ((dbr:dbstruct-sync-proc dbstruct) last-update)
+ (dbr:dbstruct-last-update-set! dbstruct curr-secs))))
+
+ (if (< (- (current-seconds) (tt-last-access ttdat)) 60)
+ (begin
+ (thread-sleep! 5)
+ (loop)))))
+ (cleanup)
+ (debug:print 0 *default-log-port* "INFO: Server timed out, exiting.")))
+
+
+;; ;; given an already set up uconn start the cmd-loop
+;; ;;
+;; (define (tt:cmd-loop ttdat)
+;; (let* ((serv-listener (-socket uconn))
+;; (listener (lambda ()
+;; (let loop ((state 'start))
+;; (let-values (((inp oup)(tcp-accept serv-listener)))
+;; ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; (let* ((rdat (deserialize inp)) ;; '(my-host-port qrykey cmd params)
+;; (resp (ulex-handler uconn rdat)))
+;; (serialize resp oup)
+;; (close-input-port inp)
+;; (close-output-port oup)
+;; ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; )
+;; (loop state))))))
+;; ;; start N of them
+;; (let loop ((thnum 0)
+;; (threads '()))
+;; (if (< thnum 100)
+;; (let* ((th (make-thread listener (conc "listener" thnum))))
+;; (thread-start! th)
+;; (loop (+ thnum 1)
+;; (cons th threads)))
+;; (map thread-join! threads)))))
+;;
+;;
+;;
+;; (define (wait-and-close uconn)
+;; (thread-join! (udat-cmd-thread uconn))
+;; (tcp-close (udat-socket uconn)))
+;;
+;;
+
+(define (tt:shutdown-server ttdat)
+ (let* ((cleanproc (tt-cleanup-proc ttdat)))
+ (if cleanproc (cleanproc))
+ (tcp-close (tt-socket ttdat)) ;; close up ports here
+ ))
+
+;; (define (wait-and-close uconn)
+;; (thread-join! (tt-cmd-thread uconn))
+;; (tcp-close (tt-socket uconn)))
+
+;; return servid
+;; side-effects:
+;; ttdat-cleanup-proc is populated with function to remove the serverinfo file
+(define (tt:create-server-registration-file ttdat dbfname)
+ (let* ((areapath (tt-areapath ttdat))
+ (servdir (tt:get-servinfo-dir areapath))
+ (host (tt-host ttdat))
+ (port (tt-port ttdat))
+ (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
+ (serv-id (tt:mk-signature areapath))
+ (clean-proc (lambda ()
+ (delete-file* servinf))))
+ (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
+ (tt-cleanup-proc-set! ttdat clean-proc)
+ (tt-servinf-file-set! ttdat servinf)
+ (with-output-to-file servinf
+ (lambda ()
+ (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
+ serv-id))
+
+;; find valid server
+;; get servers listed, last part of name must match :
+;; if more than one, wait one second and look again
+;; future: ping oldest, if alive remove other : files
+;;
+(define (tt:find-server areapath dbfname)
+ (let* ((servdir (tt:get-servinfo-dir areapath))
+ (sfiles (glob (conc servdir"/*:"dbfname))))
+ sfiles))
+
+;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
+;; example of what it's looking for in the log file:
+;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
+;;
+(define (tt:server-get-info logf)
+ (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
+ (dbprep-rx (regexp "^SERVER: dbprep"))
+ (dbprep-found 0)
+ (bad-dat (list #f #f #f #f #f #f logf)))
+ (let ((fdat (handle-exceptions
+ exn
+ (begin
+ ;; WARNING: this is potentially dangerous to blanket ignore the errors
+ (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn=" exn)
+ '()) ;; no idea what went wrong, call it a bad server, return empty list
+ (with-input-from-file logf read-lines))))
+ (if (null? fdat) ;; bad data, return bad-dat
+ bad-dat
+ (let loop ((inl (car fdat))
+ (tail (cdr fdat))
+ (lnum 0))
+ (let ((mlst (string-match server-rx inl))
+ (dbprep (string-match dbprep-rx inl)))
+ (if dbprep (set! dbprep-found 1))
+ (if (not mlst)
+ (if (> lnum 500) ;; give up if more than 500 lines of server log read
+ bad-dat
+ (if (null? tail)
+ bad-dat
+ (loop (car tail)(cdr tail)(+ lnum 1))))
+ (match mlst ;; have a not null list
+ ((_ host port start server-id pid dbfname)
+ (list host
+ (string->number port)
+ (string->number start)
+ server-id
+ (string->number pid)
+ dbfname
+ logf))
+ (else
+ (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
+ bad-dat)))))))))
+
+;; Given an area path, start a server process ### NOTE ### > file 2>&1
+;; if the target-host is set
+;; try running on that host
+;; incidental: rotate logs in logs/ dir.
+;;
+(define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area
+ (assert areapath "FATAL: tt:server-process-run called without areapath defined.")
+ (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.")
+ (assert mtexe "FATAL: tt:server-process-run called without mtexe defined.")
+ (let* ((load (get-normalized-cpu-load))
+ (nrun (number-of-processes-running "mtest.*server")))
+ (cond
+ ((> load 2.0)
+ (debug:print 0 *default-log-port* "Normalized load "load" is over the limit of 2.0. Not starting a server.")
+ (thread-sleep! 1))
+ ((> nrun 40)
+ (debug:print 0 *default-log-port* nrun" servers running on this host, not starting another.")
+ (thread-sleep! 1))
+ (else
+ (if (not (file-exists? (conc areapath"/logs")))
+ (create-directory (conc areapath"/logs") #t))
+ (let* ((logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+ (cmdln (conc
+ mtexe
+ " -server - ";; (or target-host "-")
+ " -m testsuite:" testsuite
+ ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this
+ " -db " (dbmod:run-id->dbfname run-id)
+ " " profile-mode
+ ))) ;; (conc " >> " logfile " 2>&1 &")))))
+ ;; we want the remote server to start in *toppath* so push there
+ ;; (push-directory areapath) ;; use cd in the command line instead
+ (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)"...")
+ ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
+ (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
+ (setenv "NBFAKE_LOG" logfile)
+ (system (conc "cd "areapath" ; nbfake " cmdln))
+ (unsetenv "NBFAKE_QUIET")
+ (unsetenv "NBFAKE_LOG")
+ ;;(pop-directory)
+ )))))
+
+;;======================================================================
+;; tcp connection stuff
+;;======================================================================
+
+;; find a port and start tcp-server. This only starts the tcp portion of
+;; the server, look at (tt:start-server ...) above for the entry point
+;; for the entire server system
+;;
+(define (tt:start-tcp-server ttdat)
+ (setup-listener ttdat)
+ (let* ((socket (tt-socket ttdat))
+ (handler (tt-handler ttdat)))
+ ((make-tcp-server socket handler)
+ #f ;; yes, send error messages to std-err
+ )))
+
+;; create a tcp listener and return a populated udat struct with
+;; my port, address, hostname, pid etc.
+;; return #f if fail to find a port to allocate.
+;;
+;; if udata-in is #f create the record
+;; if there is already a serv-listener return the udata
+;;
+(define (setup-listener uconn #!optional (port 4242))
+ (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn)
+ (handle-exceptions
+ exn
+ (if (< port 65535)
+ (setup-listener uconn (+ port 1))
+ #f)
+ (connect-listener uconn port)))
+
+(define (connect-listener uconn port)
+ ;; (tcp-listener-socket LISTENER)(socket-name so)
+ ;; sockaddr-address, sockaddr-port, sockaddr->string
+ (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
+ (addr (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
+ (tt-port-set! uconn port)
+ (tt-host-set! uconn addr)
+ (tt-host-port-set! uconn (conc addr":"port))
+ (tt-socket-set! uconn tlsn)
+ uconn))
+
+;;======================================================================
+;; utils
+;;======================================================================
+
+;; Generate a unique signature for this server
+(define (tt:mk-signature areapath)
+ (message-digest-string (md5-primitive)
+ (with-output-to-string
+ (lambda ()
+ (write (list areapath
+ (current-process-id)
+ (argv)))))))
+
+
+(define (tt:get-best-guess-address hostname)
+ (let ((res #f))
+ (for-each
+ (lambda (adr)
+ (if (not (eq? (u8vector-ref adr 0) 127))
+ (set! res adr)))
+ ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+ (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+ (string-intersperse
+ (map number->string
+ (u8vector->list
+ (if res res (hostname->ip hostname)))) ".")))
+
+(define (tt:get-servinfo-dir areapath)
+ (let* ((spath (conc areapath"/.servinfo")))
+ (if (not (file-exists? spath))
+ (create-directory spath #t))
+ spath))
+
+;;======================================================================
+;; network utilities
+;;======================================================================
+
+;; NOTE: Look at address-info egg as alternative to some of this
+
+(define (rate-ip ipaddr)
+ (regex-case ipaddr
+ ( "^127\\..*" _ 0 )
+ ( "^(10\\.0|192\\.168)\\..*" _ 1 )
+ ( else 2 ) ))
+
+;; Change this to bias for addresses with a reasonable broadcast value?
+;;
+(define (ip-pref-less? a b)
+ (> (rate-ip a) (rate-ip b)))
+
+(define (get-my-best-address)
+ (let ((all-my-addresses (get-all-ips)))
+ (cond
+ ((null? all-my-addresses)
+ (get-host-name)) ;; no interfaces?
+ ((eq? (length all-my-addresses) 1)
+ (car all-my-addresses)) ;; only one to choose from, just go with it
+ (else
+ (car (sort all-my-addresses ip-pref-less?))))))
+
+(define (get-all-ips-sorted)
+ (sort (get-all-ips) ip-pref-less?))
+
+(define (get-all-ips)
+ (map address-info-host
+ (filter (lambda (x)
+ (equal? (address-info-type x) "tcp"))
+ (address-infos (get-host-name)))))
+
+)
ADDED transport-mode.scm.template
Index: transport-mode.scm.template
==================================================================
--- /dev/null
+++ transport-mode.scm.template
@@ -0,0 +1,3 @@
+;; 'http or 'tcp
+(rmt:transport-mode 'tcp)
+;; (rmt:transport-mode 'http)
Index: utils/nbfake
==================================================================
--- utils/nbfake
+++ utils/nbfake
@@ -39,10 +39,11 @@
nbfake behavior can be changed by setting the following env vars:
NBFAKE_HOST SSH to \$NBFAKE_HOST and run command
NBFAKE_LOG Logfile for nbfake output
NB_WASH_GROUPS comma-separated list of groups to wash into
NB_WASH_ENABLED must be set in order to enable wash groups
+ NBFAKE_QUIET set to suppress informational output
__EOF
exit
fi
@@ -87,19 +88,21 @@
#==============================================================================
# Run and log
#==============================================================================
+if [[ -z "$NBFAKE_QUIET" ]];then
cat <<__EOF >&2
#======================================================================
# NBFAKE logging command to: $MY_NBFAKE_LOG
# $WASHCMD $*
#======================================================================
__EOF
+fi
if [[ -z "$MY_NBFAKE_HOST" ]]; then
# Run locally
sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &"
else
# run remotely
ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\""
fi