Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -14,11 +14,11 @@ ;;====================================================================== (require-extension (srfi 18) extras tcp rpc) (import (prefix rpc rpc:)) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n zmq) (import (prefix sqlite3 sqlite3:)) (declare (unit db)) (declare (uses common)) (declare (uses keys)) @@ -53,10 +53,11 @@ (begin (debug:print-info 11 "db:set-sync, setting pragma synchronous to " val) (sqlite3:execute db (conc "PRAGMA synchronous = '" val "';")))))) (define (open-db) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) + (if (not *toppath*)(setup-for-run)) (let* ((dbpath (conc *toppath* "/megatest.db")) ;; fname) (dbexists (file-exists? dbpath)) (db (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) @@ -442,18 +443,22 @@ 2)) (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit (begin (debug:print-info 4 "launch throttle factor=" *global-delta*) (set! *last-global-delta-printed* *global-delta*))) - (debug:print-info 11 "db:get-var END " var) + (debug:print-info 11 "db:get-var END " var " val=" res) res)) (define (db:set-var db var val) (debug:print-info 11 "db:set-var START " var " " val) (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val) - (debug:print-info 11 "db:set-var END " var " " val) -) + (debug:print-info 11 "db:set-var END " var " " val)) + +(define (db:del-var db var) + (debug:print-info 11 "db:del-var START " var) + (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var) + (debug:print-info 11 "db:del-var END " var)) ;; use a global for some primitive caching, it is just silly to re-read the db ;; over and over again for the keys since they never change (define (db:get-keys db) @@ -1073,14 +1078,11 @@ t.comment t.event_time t.fail_count t.pass_count t.archived - - - - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id WHERE " + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id WHERE " keystr " AND r.runname LIKE '" runname "' AND item_path LIKE '" itempatt "' AND testname LIKE '" testpatt "' AND t.state LIKE '" statepatt "' AND t.status LIKE '" statuspatt "'ORDER BY t.event_time ASC;"))) (debug:print 3 "qrystr: " qrystr) (sqlite3:for-each-row @@ -1092,76 +1094,93 @@ ;;====================================================================== ;; QUEUE UP META, TEST STATUS AND STEPS ;;====================================================================== +;; db:updater is run in a thread to write out the cached data periodically (define (db:updater) (debug:print-info 4 "Starting cache processing") - (let loop ((start-time (current-time))) + (let loop () (thread-sleep! 10) ;; move save time around to minimize regular collisions? (db:write-cached-data) - (loop start-time))) - -(define (cdb:test-set-status-state test-id status state msg) - (debug:print-info 4 "cdb:test-set-status-state test-id=" test-id ", status=" status ", state=" state ", msg=" msg) - (mutex-lock! *incoming-mutex*) - (set! *last-db-access* (current-seconds)) - (if msg - (set! *incoming-data* (cons (vector 'state-status-msg - (current-milliseconds) - (list state status msg test-id)) - *incoming-data*)) - (set! *incoming-data* (cons (vector 'state-status - (current-milliseconds) - (list state status test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) - *incoming-data*))) - (mutex-unlock! *incoming-mutex*) - (if *cache-on* - (debug:print-info 6 "*cache-on* is " *cache-on* ", skipping cache write") - (db:write-cached-data))) - -(define (cdb:test-rollup-test_data-pass-fail test-id) - (debug:print-info 4 "Adding " test-id " for test_data rollup to the queue") - (mutex-lock! *incoming-mutex*) - (set! *last-db-access* (current-seconds)) - (set! *incoming-data* (cons (vector 'test_data-pf-rollup - (current-milliseconds) - (list test-id test-id test-id test-id)) - *incoming-data*)) - (mutex-unlock! *incoming-mutex*) - (if *cache-on* - (debug:print-info 6 "*cache-on* is " *cache-on* ", skipping cache write") - (db:write-cached-data))) - -(define (cdb:pass-fail-counts test-id fail-count pass-count) - (debug:print-info 4 "Adding " test-id " for setting pass/fail counts to the queue") - (mutex-lock! *incoming-mutex*) - (set! *last-db-access* (current-seconds)) - (set! *incoming-data* (cons (vector 'pass-fail-counts - (current-milliseconds) - (list fail-count pass-count test-id)) - *incoming-data*)) - (mutex-unlock! *incoming-mutex*) - (if *cache-on* - (debug:print-info 6 "*cache-on* is " *cache-on* ", skipping cache write") - (db:write-cached-data))) - -(define (cdb:tests-register-test db run-id test-name item-path #!key (force-write #f)) + (loop))) + +;; cdb:cached-access is called by the server loop to dispatch commands or queue up +;; db accesses +;; +;; params := qry-name cached? val1 val2 val3 ... +(define (cdb:cached-access params) + (debug:print-info 12 "cdb:cached-access params=" params) + (if (< (length params) 2) + "ERROR" + (let ((qry-name (car params)) + (cached? (cadr params)) + (remparam (list-tail params 2))) + (debug:print-info 12 "cdb:cached-access qry-name=" qry-name " params=" params) + ;; Any special calls are dispatched here. + ;; Remainder are put in the db queue + (case qry-name + ((login) ;; login checks that the megatest path matches + (if (null? remparam) + #f ;; no path - fail! + (let ((calling-path (car remparam))) + (if (equal? calling-path *toppath*) + #t ;; path matches - pass! Should vet the caller at this time ... + #f)))) ;; else fail to login + ((flush) + (db:write-cached-data) + #t) + (else + (mutex-lock! *incoming-mutex*) + (set! *last-db-access* (current-seconds)) + (set! *incoming-data* (cons + (vector qry-name + (current-milliseconds) + params) + *incoming-data*)) + (mutex-unlock! *incoming-mutex*) + ;; NOTE: if cached? is #f then this call must be run immediately + ;; but first all calls in the queue are run first in the order + ;; of their time stamp + (if (and cached? *cache-on*) + (begin + (debug:print-info 12 "*cache-on* is " *cache-on* ", skipping cache write") + "CACHED") + (begin + (db:write-cached-data) + "WRITTEN"))))))) + +(define (db:obj->string obj)(with-output-to-string (lambda ()(serialize obj)))) +(define (db:string->obj msg)(with-input-from-string msg (lambda ()(deserialize)))) + +(define (cdb:client-call zmq-socket . params) + (debug:print-info 11 "cdb:client-call zmq-socket=" zmq-socket " params=" params) + (let ((zdat (db:obj->string params)) ;; (with-output-to-string (lambda ()(serialize params)))) + (res #f)) + (print "cdb:client-call before send message") + (send-message zmq-socket zdat) + (print "cdb:client-call after send message") + (set! res (db:string->obj (receive-message zmq-socket zdat))) + (debug:print-info 11 "zmq-socket " (car params) " res=" res) + res)) + +(define (cdb:test-set-status-state zmqsocket test-id status state msg) + (if msg + (cdb:client-call zmqsocket 'state-status-msg state status msg test-id) + (cdb:client-call zmqsocket 'state-status state status test-id))) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) + +(define (cdb:test-rollup-test_data-pass-fail zmqsocket test-id) + (cdb:client-call zmqsocket 'test_data-pf-rollup #t test-id test-id test-id)) + +(define (cdb:pass-fail-counts zmqsocket test-id fail-count pass-count) + (cdb:client-call zmqsocket 'pass-fail-counts fail-count pass-count test-id)) + +(define (cdb:tests-register-test zmqsocket db run-id test-name item-path) (let ((item-paths (if (equal? item-path "") (list item-path) (list item-path "")))) - (debug:print-info 4 "Adding " run-id ", " test-name "/" item-path " for setting pass/fail counts to the queue") - (mutex-lock! *incoming-mutex*) - (set! *last-db-access* (current-seconds)) - (set! *incoming-data* (cons (vector 'register-test - (current-milliseconds) - (list run-id test-name item-path)) ;; fail-count pass-count test-id)) - *incoming-data*)) - (mutex-unlock! *incoming-mutex*) - (if (and (not force-write) *cache-on*) - (debug:print-info 6 "*cache-on* is " *cache-on* ", skipping cache write") - (db:write-cached-data)))) + (cdb:client-call zmqsocket 'register-test run-id test-name item-path))) ;; The queue is a list of vectors where the zeroth slot indicates the type of query to ;; apply and the second slot is the time of the query and the third entry is a list of ;; values to be applied ;; @@ -1227,11 +1246,23 @@ #f)) (define cdb:flush-queue db:write-cached-data) (define (db:roll-up-pass-fail-counts db run-id test-name item-path status) - (rdb:flush-queue) + + + + + + ;; NEEDED!? + ;; (rdb:flush-queue) + + + + + + (if (and (not (equal? item-path "")) (or (equal? status "PASS") (equal? status "WARN") (equal? status "FAIL") (equal? status "WAIVED") @@ -1699,55 +1730,48 @@ ;;====================================================================== ;; REMOTE DB ACCESS VIA RPC ;;====================================================================== -(define (rdb:open-run-close procname . remargs) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - (apply (rpc:procedure 'rdb:open-run-close host port) procname remargs)) - (apply open-run-close (eval procname) remargs))) - -(define (rdb:test-set-status-state test-id status state msg) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - (handle-exceptions - exn - (begin - (debug:print 0 "EXCEPTION: rpc call failed?") - (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain) - (cdb:test-set-status-state test-id status state msg)) - ((rpc:procedure 'cdb:test-set-status-state host port) test-id status state msg))) - (cdb:test-set-status-state test-id status state msg))) - -(define (rdb:test-rollup-test_data-pass-fail test-id) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - ((rpc:procedure 'cdb:test-rollup-test_data-pass-fail host port) test-id)) - (cdb:test-rollup-test_data-pass-fail test-id))) - -(define (rdb:pass-fail-counts test-id fail-count pass-count) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - ((rpc:procedure 'cdb:pass-fail-counts host port) test-id fail-count pass-count)) - (cdb:pass-fail-counts test-id fail-count pass-count))) - -;; currently forces a flush of the queue -(define (rdb:tests-register-test db run-id test-name item-path) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - ((rpc:procedure 'cdb:tests-register-test host port) db run-id test-name item-path force-write: #t)) - (cdb:tests-register-test db run-id test-name item-path force-write: #t))) - -(define (rdb:flush-queue) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - ((rpc:procedure 'cdb:flush-queue host port))) - (cdb:flush-queue))) - +;; (define (rdb:test-set-status-state test-id status state msg) +;; (if *runremote* +;; (let ((host (vector-ref *runremote* 0)) +;; (port (vector-ref *runremote* 1))) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 "EXCEPTION: rpc call failed?") +;; (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) +;; (print-call-chain) +;; (cdb:test-set-status-state test-id status state msg)) +;; ((rpc:procedure 'cdb:test-set-status-state host port) test-id status state msg))) +;; (cdb:test-set-status-state test-id status state msg))) +;; +;; (define (rdb:test-rollup-test_data-pass-fail test-id) +;; (if *runremote* +;; (let ((host (vector-ref *runremote* 0)) +;; (port (vector-ref *runremote* 1))) +;; ((rpc:procedure 'cdb:test-rollup-test_data-pass-fail host port) test-id)) +;; (cdb:test-rollup-test_data-pass-fail test-id))) +;; +;; (define (rdb:pass-fail-counts test-id fail-count pass-count) +;; (if *runremote* +;; (let ((host (vector-ref *runremote* 0)) +;; (port (vector-ref *runremote* 1))) +;; ((rpc:procedure 'cdb:pass-fail-counts host port) test-id fail-count pass-count)) +;; (cdb:pass-fail-counts test-id fail-count pass-count))) +;; +;; ;; currently forces a flush of the queue +;; (define (rdb:tests-register-test db run-id test-name item-path) +;; (if *runremote* +;; (let ((host (vector-ref *runremote* 0)) +;; (port (vector-ref *runremote* 1))) +;; ((rpc:procedure 'cdb:tests-register-test host port) db run-id test-name item-path force-write: #t)) +;; (cdb:tests-register-test db run-id test-name item-path force-write: #t))) +;; +;; (define (rdb:flush-queue) +;; (if *runremote* +;; (let ((host (vector-ref *runremote* 0)) +;; (port (vector-ref *runremote* 1))) +;; ((rpc:procedure 'cdb:flush-queue host port))) +;; (cdb:flush-queue))) +;; Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -8,11 +8,11 @@ ;; PURPOSE. ;; (include "common.scm") ;; (include "megatest-version.scm") -(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos) ;; (srfi 18) extras) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos zmq) ;; (srfi 18) extras) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) (declare (uses common)) (declare (uses megatest-version)) @@ -252,10 +252,35 @@ (if (args:get-arg "-env2file") (begin (save-environment-as-files (args:get-arg "-env2file")) (set! *didsomething* #t))) + +;;====================================================================== +;; Start the server - can be done in conjunction with -runall or -runtests (one day...) +;; we start the server if not running else start the client thread +;;====================================================================== +(if (args:get-arg "-server") + (let* ((toppath (setup-for-run)) + (db (if toppath (open-db) #f))) + (debug:print-info 0 "Starting the standalone server") + (if db + (let* ((th2 (make-thread (lambda () + (server:run (args:get-arg "-server"))))) + (th3 (make-thread (lambda () + (server:keep-running db))))) + (thread-start! th3) + (thread-start! th2) + (thread-join! th3) + (set! *didsomething* #t)) + (debug:print 0 "ERROR: Failed to setup for megatest"))) + ;; not starting server? then start the client + (if (server:client-setup) + (debug:print-info 0 "connected as client") + (begin + (debug:print 0 "ERROR: Failed to connect as client") + (exit)))) ;;====================================================================== ;; Remove old run(s) ;;====================================================================== @@ -360,27 +385,10 @@ tests)))) runs) (set! *didsomething* #t) ))) -;;====================================================================== -;; Start the server - can be done in conjunction with -runall or -runtests (one day...) -;;====================================================================== -(if (args:get-arg "-server") - (let* ((toppath (setup-for-run)) - (db (if toppath (open-db) #f))) - (debug:print-info 0 "Starting the standalone server") - (if db - (let* ((host:port (db:get-var db "SERVER")) ;; this doen't support multiple servers BUG!!!! - (th2 (server:start db (args:get-arg "-server"))) - (th3 (make-thread (lambda () - (server:keep-running db host:port))))) - (thread-start! th3) - (thread-join! th3) - (set! *didsomething* #t)) - (debug:print 0 "ERROR: Failed to setup for megatest")))) - ;;====================================================================== ;; full run ;;====================================================================== ;; get lock in db for full run for this directory @@ -397,21 +405,34 @@ ;; if still ok to run tasks ;; process deferred tasks per above steps ;; run all tests are are Not COMPLETED and PASS or CHECK (if (args:get-arg "-runall") - (general-run-call - "-runall" - "run all tests" - (lambda (target runname keys keynames keyvallst) + (let ((server-thread #f)) + (if (args:get-arg "-server") + (let ((toppath (setup-for-run)) + (db (open-db))) + (if db + (let* ((host:port (db:get-var db "SERVER")) ;; this doen't support multiple servers BUG!!!! + (th2 (server:start db (args:get-arg "-server"))) + (th3 (make-thread (lambda () + (server:keep-running db host:port))))) + (thread-start! th3) + (set! server-thread th3))))) + (general-run-call + "-runall" + "run all tests" + (lambda (target runname keys keynames keyvallst) (runs:run-tests target runname (if (args:get-arg "-testpatt") (args:get-arg "-testpatt") "%/%") user - args:arg-hash)))) ;; ) + args:arg-hash))) + (if server-thread + (thread-join! server-thread)))) ;;====================================================================== ;; run one test ;;====================================================================== Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -6,14 +6,14 @@ ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. -(require-extension (srfi 18) extras tcp rpc) +(require-extension (srfi 18) extras tcp rpc s11n) (import (prefix rpc rpc:)) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo zmq) (import (prefix sqlite3 sqlite3:)) (declare (unit server)) (declare (uses common)) @@ -21,193 +21,135 @@ (declare (uses tests)) (include "common_records.scm") (include "db_records.scm") -;; procstr is the name of the procedure to be called as a string -(define (server:autoremote procstr params) - (handle-exceptions - exn - (begin - (debug:print 1 "Remote failed for " proc " " params) - (apply (eval (string->symbol procstr)) params)) - ;; (if *runremote* - ;; (apply (eval (string->symbol (conc "remote:" procstr))) params) - (apply (eval (string->symbol procstr)) params))) - -(define (server:start db hostn) +(define (server:run hostn) (debug:print 0 "Attempting to start the server ...") - (let ((host:port (db:get-var db "SERVER"))) ;; do whe already have a server running? + (let ((host:port (open-run-close db:get-var #f "SERVER"))) ;; do whe already have a server running? (if host:port - (set! *runremote* (let* ((lst (string-split host:port ":")) - (port (if (> (length lst) 1) - (string->number (cadr lst)) - #f))) - (if port (vector (car lst) port) #f))) - (let* ((rpc:listener (server:find-free-port-and-open (rpc:default-server-port))) - (th1 (make-thread - (cute (rpc:make-server rpc:listener) "rpc:server") - 'rpc:server)) - ;; (th2 (make-thread (lambda ()(db:updater)))) + (begin + (debug:print 0 "ERROR: server already running.") + (if (server:client-setup) + (begin + (debug:print-info 0 "Server is alive, exiting") + (exit)) + (begin + (debug:print-info 0 "Server is dead, removing flag and trying again") + (open-run-close db:del-var #f "SERVER") + (server:run hostn)))) + (let* ((zmq-socket #f) (hostname (if (string=? "-" hostn) (get-host-name) hostn)) - (ipaddrstr (if (string=? "-" hostn) - (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - #f)) - (host:port (conc (if ipaddrstr ipaddrstr hostname) ":" (rpc:default-server-port)))) - (debug:print 0 "Server started on " host:port) - (db:set-var db "SERVER" host:port) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + #f))) + (if ipstr ipstr hostname)))) + (set! zmq-socket (server:find-free-port-and-open ipaddrstr zmq-socket 5555)) (set! *cache-on* #t) - ;; can use this to run most anything at the remote - (rpc:publish-procedure! - 'remote:run - (lambda (procstr . params) - (server:autoremote procstr params))) - - (rpc:publish-procedure! - 'server:login - (lambda (toppath) - (set! *last-db-access* (current-seconds)) - (if (equal? *toppath* toppath) - (begin - (debug:print-info 2 "login successful") - #t) - #f))) - - ;;====================================================================== - ;; db specials here - ;;====================================================================== - ;; remote call to open-run-close - (rpc:publish-procedure! - 'rdb:open-run-close - (lambda (procname . remargs) - (debug:print-info 12 "Remote call of rdb:open-run-close " procname " " remargs) - (set! *last-db-access* (current-seconds)) - (apply open-run-close (eval procname) remargs))) - - (rpc:publish-procedure! - 'cdb:test-set-status-state - (lambda (test-id status state msg) - (debug:print-info 12 "Remote call of cdb:test-set-status-state test-id=" test-id ", status=" status ", state=" state ", msg=" msg) - (cdb:test-set-status-state test-id status state msg))) - - (rpc:publish-procedure! - 'cdb:test-rollup-test_data-pass-fail - (lambda (test-id) - (debug:print-info 12 "Remote call of cdb:test-rollup-test_data-pass-fail " test-id) - (cdb:test-rollup-test_data-pass-fail test-id))) - - (rpc:publish-procedure! - 'cdb:pass-fail-counts - (lambda (test-id fail-count pass-count) - (debug:print-info 12 "Remote call of cdb:pass-fail-counts " test-id " passes: " pass-count " fails: " fail-count) - (cdb:pass-fail-counts test-id fail-count pass-count))) - - (rpc:publish-procedure! - 'cdb:tests-register-test - (lambda (db run-id test-name item-path) - (debug:print-info 12 "Remote call of cdb:tests-register-test " run-id " testname: " test-name " item-path: " item-path) - (cdb:tests-register-test db run-id test-name item-path))) - - (rpc:publish-procedure! - 'cdb:flush-queue - (lambda () - (debug:print-info 12 "Remote call of cdb:flush-queue") - (cdb:flush-queue))) - - ;;====================================================================== - ;; end of publish-procedure section - ;;====================================================================== - - (set! *rpc:listener* rpc:listener) + ;; what to do when we quit + ;; (on-exit (lambda () - (open-run-close - (lambda (db . params) - (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER' and val=?;" host:port)) - #f ;; for db - #f) ;; for a param - (let loop ((n 0)) + (open-run-close db:del-var #f "SERVER") + (let loop () (let ((queue-len 0)) (thread-sleep! (random 5)) (mutex-lock! *incoming-mutex*) (set! queue-len (length *incoming-data*)) (mutex-unlock! *incoming-mutex*) (if (> queue-len 0) (begin (debug:print-info 0 "Queue not flushed, waiting ...") - (loop (+ n 1))))) - ))) - (db:updater) - (thread-start! th1) - ;; (debug:print 0 "Server started on port " (rpc:default-server-port) "...") - ;; (thread-start! th2) - ;; (thread-join! th2) - ;; return th2 for the calling process to do a join with - th1 - )))) ;; rpc:server))) - -(define (server:keep-running db host:port) + (loop))))))) + + ;; The heavy lifting + ;; + (let loop () + (let* ((rawmsg (receive-message zmq-socket)) + (params (db:string->obj rawmsg)) ;; (with-input-from-string rawmsg (lambda ()(deserialize)))) + (res #f)) + (debug:print-info 12 "server=> received params=" params) + (set! res (cdb:cached-access params)) + (debug:print-info 12 "server=> processed res=" res) + (send-message zmq-socket (db:obj->string res)) + (loop))))))) + +;; run server:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (server:keep-running db) ;; if none running or if > 20 seconds since ;; server last used then start shutdown - (let loop ((count 0)) + (let loop () (thread-sleep! 20) ;; no need to do this very often (let ((numrunning (db:get-count-tests-running db))) (if (or (> numrunning 0) (> (+ *last-db-access* 60)(current-seconds))) (begin (debug:print-info 0 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) - (loop (+ 1 count))) + (loop)) (begin (debug:print-info 0 "Starting to shutdown the server side") ;; need to delete only *my* server entry (future use) - (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER' AND val like ?;" host:port) + (db:del-var db "SERVER") (thread-sleep! 10) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") ;; (exit))) ))))) -(define (server:find-free-port-and-open port) - (handle-exceptions - exn - (begin - (print "Failed to bind to port " (rpc:default-server-port) ", trying next port") - (server:find-free-port-and-open (+ port 1))) - (rpc:default-server-port port) - (tcp-read-timeout 240000) - (tcp-listen (rpc:default-server-port) 10000))) +(define (server:find-free-port-and-open host s port) + (let ((s (if s s (make-socket 'rep))) + (p (if (number? port) port 5555))) + (handle-exceptions + exn + (begin + (debug:print 0 "Failed to bind to port " p ", trying next port") + (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) + (server:find-free-port-and-open host s (+ p 1))) + (let ((zmq-url (conc "tcp://" host ":" p))) + (print "Trying to start server on " zmq-url) + (bind-socket s zmq-url) + (set! *runremote* #f) + (debug:print 0 "Server started on " zmq-url) + (open-run-close db:set-var #f "SERVER" zmq-url) + s)))) (define (server:client-setup) - (if *runremote* - (begin - (debug:print 0 "ERROR: Attempt to connect to server but already connected") - #f) - (let* ((hostinfo (open-run-close db:get-var #f "SERVER")) - (hostdat (if hostinfo (string-split hostinfo ":") #f)) - (host (if hostinfo (car hostdat) #f)) - (port (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f))) - (if (and port - (string->number port)) - (let ((portn (string->number port))) - (debug:print-info 2 "Setting up to connect to host " host ":" port) - (handle-exceptions - exn - (begin - (debug:print 0 "ERROR: Failed to open a connection to the server at host: " host " port: " port) - (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - ;; (open-run-close - ;; (lambda (db . param) - ;; (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'")) - ;; #f) - (set! *runremote* #f)) - (if (and (not (args:get-arg "-server")) ;; no point in the server using the server using the server - ((rpc:procedure 'server:login host portn) *toppath*)) - (begin - (debug:print-info 2 "Logged in and connected to " host ":" port) - (set! *runremote* (vector host portn))) - (begin - (debug:print-info 2 "Failed to login or connect to " host ":" port) - (set! *runremote* #f))))) - (debug:print-info 2 "no server available"))))) + (let* ((hostinfo (open-run-close db:get-var #f "SERVER")) + (zmq-socket (make-socket 'req))) + (if hostinfo + (begin + (debug:print-info 2 "Setting up to connect to " hostinfo) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo) + (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " perhaps jobs killed with -9? Removing server records") + (open-run-close db:del-var #f "SERVER") + (exit) + #f) + (let ((connect-ok #f)) + (connect-socket zmq-socket hostinfo) + (set! connect-ok (cdb:client-call zmq-socket 'login #t *toppath*)) + (if connect-ok + (begin + (debug:print-info 2 "Logged in and connected to " hostinfo) + (set! *runremote* zmq-socket) + #t) + (begin + (debug:print-info 2 "Failed to login or connect to " hostinfo) + (set! *runremote* #f) + #f))))) + (begin + (debug:print-info 2 "No server available, attempting to start one...") + (system (conc "megatest -server - " (if (args:get-arg "-debug") + (conc "-debug " (args:get-arg "-debug")) + "") + " &")) + (sleep 5) + (server:client-setup))))) + Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -218,11 +218,11 @@ (if waived (set! real-status "WAIVED")) (debug:print 4 "real-status " real-status ", waived " waived ", status " status) ;; update the primary record IF state AND status are defined (if (and state status) - (rdb:test-set-status-state test-id real-status state #f)) + (cdb:test-set-status-state *runremote* test-id real-status state #f)) ;; if status is "AUTO" then call rollup (note, this one modifies data in test ;; run area, do not rpc it (yet) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup #f test-id status)) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -65,11 +65,12 @@ cd ..;make install rm -f fullrun/logging.db touch cleanprep fullprep : cleanprep - cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt % -itempatt % + cd fullrun;$(MEGATEST) -server - & + sleep 5;cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt % -itempatt % cd fullrun;$(BINPATH)/dboard -rows 15 & dashboard : cleanprep cd fullrun && $(BINPATH)/dashboard -rows 25 & Index: testzmq/hwclient.scm ================================================================== --- testzmq/hwclient.scm +++ testzmq/hwclient.scm @@ -1,9 +1,9 @@ (use zmq posix) (define s (make-socket 'req)) -(connect-socket s "tcp://127.0.0.1:5563") +(connect-socket s "tcp://*:5563") (define myname (cadr (argv))) (print "Start client...") Index: testzmq/hwserver.scm ================================================================== --- testzmq/hwserver.scm +++ testzmq/hwserver.scm @@ -1,15 +1,15 @@ (use zmq srfi-18 posix) (define s (make-socket 'rep)) -(bind-socket s "tcp://127.0.0.1:5563") +(bind-socket s "tcp://*:5563") (print "Start server...") (let loop () (let* ((msg (receive-message s)) (name (caddr (string-split msg " "))) (resp (conc "World " name))) (print "Received request: [" msg "]") - (thread-sleep! 0.01) + (thread-sleep! 0.0001) (print "Sending response \"" resp "\"") (send-message s resp) (loop))) Index: utils/installall.sh ================================================================== --- utils/installall.sh +++ utils/installall.sh @@ -234,10 +234,13 @@ --disable-schedutils \ --disable-libblkid \ --disable-wall make install +# --disable-makeinstall-chown \ +# --disable-makeinstall-setuid \ + # --disable-chsh-only-listed # --disable-pg-bell let pg not ring the bell on invalid keys # --disable-require-password # --disable-use-tty-group do not install wall and write setgid tty # --disable-makeinstall-chown