Index: dashboard-guimonitor.scm ================================================================== --- dashboard-guimonitor.scm +++ dashboard-guimonitor.scm @@ -179,11 +179,11 @@ ;; (print "On-exit called") (tasks:remove-monitor-record tdb) (sqlite3:finalize! tdb)))) (define (gui-monitor db) - (let ((keys (rdb:get-keys db)) + (let ((keys (db:get-keys db)) (tdb (tasks:open-db))) (tasks:register-monitor db tdb) ;;; let the other monitors know we are here (control-panel db tdb keys) ;(tasks:remove-monitor-record db) ;(sqlite3:finalize! db) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -70,36 +70,32 @@ db)) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) - (let* ((db (if idb idb (open-db))) + (let* ((db (if idb + (if (procedure? idb) + (idb) + idb) + (open-db))) (res #f)) (set! res (apply proc db params)) (if (not idb)(sqlite3:finalize! db)) (debug:print-info 11 "open-run-close-no-exception-handling END" ) res)) (define (open-run-close-exception-handling proc idb . params) - (debug:print-info 11 "open-run-close-exception-handling START, idb=" idb ", params=" params) - (let ((runner (lambda () - (let* ((db (if idb idb (open-db))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! db)) - (debug:print-info 11 "open-run-close-no-exception-handling END" ) - res)))) - (handle-exceptions - exn - (begin - (debug:print 0 "EXCEPTION: database probably overloaded?") - (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain) - (thread-sleep! (random 120)) - (debug:print-info 0 "trying db call one more time....") - (runner)) - (runner)))) + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: database probably overloaded?") + (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain) + (thread-sleep! (random 120)) + (debug:print-info 0 "trying db call one more time....") + (apply open-run-close-no-exception-handling proc idb params)) + (apply open-run-close-no-exception-handling proc idb params))) (define open-run-close open-run-close-exception-handling) (define *global-delta* 0) (define *last-global-delta-printed* 0) @@ -983,15 +979,18 @@ "SELECT rundir FROM tests WHERE id=?;" test-id) (hash-table-set! *test-paths* test-id res) res)))) -(define (db:test-set-log! db test-id logf) - (if (string? logf) - (sqlite3:execute db "UPDATE tests SET final_logf=? WHERE id=?;" - logf test-id) - (debug:print 0 "ERROR: db:test-set-log! called with non-string log file name " logf))) +(define (cdb:test-set-log! zmqsocket test-id logf) + (if (string? logf)(cdb:client-call zmqsocket 'test-set-log #t test-id logf))) + +;; (define (db:test-set-log! db test-id logf) +;; (if (string? logf) +;; (sqlite3:execute db "UPDATE tests SET final_logf=? WHERE id=?;" +;; logf test-id) +;; (debug:print 0 "ERROR: db:test-set-log! called with non-string log file name " logf))) ;;====================================================================== ;; Misc. test related queries ;;====================================================================== @@ -1197,11 +1196,12 @@ (rollup-tests-pass-fail "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'), pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED')) - WHERE run_id=? AND testname=? AND item_path='';"))) + WHERE run_id=? AND testname=? AND item_path='';") + (test-set-log "UPDATE tests SET final_logf=? WHERE id=?;"))) (define db:special-queries '(rollup-tests-pass-fail)) (define db:run-local-queries '(rollup-tests-pass-fail)) ;; The queue is a list of vectors where the zeroth slot indicates the type of query to @@ -1232,11 +1232,11 @@ (stmts data)) (if special-qry ;; handle a query that cannot be part of the grouped queries (let* ((stmt-key (vector-ref special-qry 0)) (qry (hash-table-ref queries stmt-key)) - (params (vector-ref speical-qry 2))) + (params (vector-ref special-qry 2))) (apply sqlite3:execute db qry params) (if (not (null? stmts)) (outerloop #f stmts))) ;; handle normal queries (sqlite3:with-transaction @@ -1730,54 +1730,5 @@ results) ;; brutal clean up (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") - - -;;====================================================================== -;; REMOTE DB ACCESS VIA RPC -;;====================================================================== - -;; (define (rdb:test-set-status-state test-id status state msg) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 "EXCEPTION: rpc call failed?") -;; (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) -;; (print-call-chain) -;; (cdb:test-set-status-state test-id status state msg)) -;; ((rpc:procedure 'cdb:test-set-status-state host port) test-id status state msg))) -;; (cdb:test-set-status-state test-id status state msg))) -;; -;; (define (rdb:test-rollup-test_data-pass-fail test-id) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:test-rollup-test_data-pass-fail host port) test-id)) -;; (cdb:test-rollup-test_data-pass-fail test-id))) -;; -;; (define (rdb:pass-fail-counts test-id fail-count pass-count) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:pass-fail-counts host port) test-id fail-count pass-count)) -;; (cdb:pass-fail-counts test-id fail-count pass-count))) -;; -;; ;; currently forces a flush of the queue -;; (define (rdb:tests-register-test db run-id test-name item-path) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:tests-register-test host port) db run-id test-name item-path force-write: #t)) -;; (cdb:tests-register-test db run-id test-name item-path force-write: #t))) -;; -;; (define (rdb:flush-queue) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:flush-queue host port))) -;; (cdb:flush-queue))) -;; Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -208,11 +208,11 @@ (let ((exinfo (vector-ref exit-info 2)) (logfna (if logpro-used (conc stepname ".html") ""))) ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect) (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) (if logpro-used - (open-run-close db:test-set-log! #f test-id (conc stepname ".html"))) + (cdb:test-set-log! *runremote* test-id (conc stepname ".html"))) ;; set the test final status (let* ((this-step-status (cond ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) ((eq? (vector-ref exit-info 2) 0) 'pass) (else 'fail))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -257,12 +257,22 @@ ;;====================================================================== ;; Start the server - can be done in conjunction with -runall or -runtests (one day...) ;; we start the server if not running else start the client thread ;;====================================================================== + (if (args:get-arg "-server") - (server:launch) + (server:launch)) + +(if (or (let ((res #f)) + (for-each + (lambda (key) + (if (args:get-arg key)(set! res #t))) + (list "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")) + res) + (eq? (length (hash-table-keys args:arg-hash)) 0)) + (debug:print-info 1 "No server needed") (server:client-launch)) ;;====================================================================== ;; Remove old run(s) ;;====================================================================== @@ -673,11 +683,11 @@ (if (args:get-arg "-load-test-data") ;; has sub commands that are rdb: (open-run-close db:load-test-data db test-id)) (if (args:get-arg "-setlog") (let ((logfname (args:get-arg "-setlog"))) - (open-run-close db:test-set-log! db test-id logfname))) + (cdb:test-set-log! *runremote* #t test-id logfname))) (if (args:get-arg "-set-toplog") (open-run-close tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog"))) (if (args:get-arg "-summarize-items") (open-run-close tests:summarize-items db run-id test-name #t)) ;; do force here (if (args:get-arg "-runstep") @@ -716,11 +726,11 @@ (debug:print-info 2 "running \"" cmd "\"") (change-directory startingdir) (set! exitstat (system cmd)) (set! *globalexitstatus* exitstat) ;; no necessary (change-directory testpath) - (open-run-close db:test-set-log! db test-id htmllogfile))) + (cdb:test-set-log! *runremote* #t test-id htmllogfile))) (let ((msg (args:get-arg "-m"))) (open-run-close db:teststep-set-status! db test-id stepname "end" exitstat msg logfile)) ))) (if (or (args:get-arg "-test-status") (args:get-arg "-set-values")) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -17,44 +17,54 @@ (declare (unit server)) (declare (uses common)) (declare (uses db)) (declare (uses tests)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (include "common_records.scm") (include "db_records.scm") +(define (server:make-server-url hostport) + (if (not hostport) + #f + (conc "tcp://" (car hostport) ":" (cadr hostport)))) + (define (server:run hostn) (debug:print 0 "Attempting to start the server ...") - (let ((host:port (open-run-close db:get-var #f "SERVER"))) ;; do whe already have a server running? + (if (not *toppath*)(setup-for-run)) + (let* ((hostport (open-run-close tasks:get-best-server tasks:open-db)) ;; do whe already have a server running? + (host:port (server:make-server-url hostport))) (if host:port (begin (debug:print 0 "NOTE: server already running.") (if (server:client-setup) (begin - (debug:print-info 0 "Server is alive, not starting another") - ;;(exit) - ) + (debug:print-info 0 "Server is alive, not starting another")) (begin - (debug:print-info 0 "Server is dead, removing flag and trying again") - (open-run-close db:del-var #f "SERVER") - (server:run hostn)))) + (debug:print-info 0 "Server is dead, deregistering it, please try again") + (open-run-close tasks:server-deregister tasks:open-db (car hostport) port: (cadr port)) + ;; (server:run hostn) + (debug:print 0 "WOULD NORMALLY START ANOTHER SERVER HERE") + ) + ) + ) (let* ((zmq-socket #f) (hostname (if (string=? "-" hostn) (get-host-name) hostn)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") #f))) (if ipstr ipstr hostname)))) - (set! zmq-socket (server:find-free-port-and-open ipaddrstr zmq-socket 5555)) + (set! zmq-socket (server:find-free-port-and-open ipaddrstr zmq-socket 5555 0)) (set! *cache-on* #t) ;; what to do when we quit ;; (on-exit (lambda () - (open-run-close db:del-var #f "SERVER") + (open-run-close tasks:server-deregister-self tasks:open-db) (let loop () (let ((queue-len 0)) (thread-sleep! (random 5)) (mutex-lock! *incoming-mutex*) (set! queue-len (length *incoming-data*)) @@ -101,29 +111,33 @@ (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") ;; (exit))) )))))) -(define (server:find-free-port-and-open host s port) +(define (server:find-free-port-and-open host s port trynum) (let ((s (if s s (make-socket 'rep))) (p (if (number? port) port 5555))) (handle-exceptions exn (begin (debug:print 0 "Failed to bind to port " p ", trying next port") (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - (server:find-free-port-and-open host s (+ p 1))) + (if (< trynum 100) + (server:find-free-port-and-open host s (+ p 1) (+ trynum 1)) + (debug:print-info 0 "Tried ports from " (- p trynum) " to " p + " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))) (let ((zmq-url (conc "tcp://" host ":" p))) (print "Trying to start server on " zmq-url) (bind-socket s zmq-url) (set! *runremote* #f) (debug:print 0 "Server started on " zmq-url) - (open-run-close db:set-var #f "SERVER" zmq-url) + (open-run-close tasks:server-register tasks:open-db (current-process-id) host p 0 'live) s)))) (define (server:client-setup) - (let* ((hostinfo (open-run-close db:get-var #f "SERVER")) + (if (not *toppath*)(setup-for-run)) + (let* ((hostinfo (open-run-close tasks:get-best-server tasks:open-db)) (zmq-socket (make-socket 'req))) (if hostinfo (begin (debug:print-info 2 "Setting up to connect to " hostinfo) (handle-exceptions @@ -130,33 +144,36 @@ exn (begin (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo) (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " perhaps jobs killed with -9? Removing server records") - (open-run-close db:del-var #f "SERVER") - (exit) + (open-run-close tasks:server-deregister tasks:open-db (car hostinfo) port: (cadr hostinfo)) + ;; (exit) ;; why forced exit? #f) - (let ((connect-ok #f)) - (connect-socket zmq-socket hostinfo) + (let ((connect-ok #f) + (conurl (server:make-server-url hostinfo))) + (connect-socket zmq-socket conurl) (set! connect-ok (cdb:client-call zmq-socket 'login #t *toppath*)) (if connect-ok (begin - (debug:print-info 2 "Logged in and connected to " hostinfo) + (debug:print-info 2 "Logged in and connected to " conurl) (set! *runremote* zmq-socket) #t) (begin - (debug:print-info 2 "Failed to login or connect to " hostinfo) + (debug:print-info 2 "Failed to login or connect to " conurl) (set! *runremote* #f) #f))))) (begin - (debug:print-info 2 "No server available, attempting to start one...") - (system (conc "megatest -server - " (if (args:get-arg "-debug") - (conc "-debug " (args:get-arg "-debug")) - "") - " &")) - (sleep 5) - (server:client-setup))))) + (debug:print-info 0 "NO SERVER RUNNING! PLEASE START ONE! E.g. \"megatest -server - &\"") + ;; (debug:print-info 2 "No server available, attempting to start one...") + ;; (system (conc (car (argv)) " -server - " (if (args:get-arg "-debug") + ;; (conc "-debug " (args:get-arg "-debug")) + ;; "") + ;; " &")) + ;; (sleep 5) + ;; (server:client-setup) + )))) (define (server:launch) (let* ((toppath (setup-for-run))) (debug:print-info 0 "Starting the standalone server") (if *toppath* Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -23,16 +23,16 @@ ;;====================================================================== (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) - (tdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) - (sqlite3:set-busy-handler! tdb handler) + (sqlite3:set-busy-handler! mdb handler) (if (not exists) (begin - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, state TEXT DEFAULT 'new', target TEXT DEFAULT '', name TEXT DEFAULT '', @@ -40,19 +40,96 @@ item TEXT DEFAULT '', keylock TEXT, params TEXT, creation_time TIMESTAMP, execution_time TIMESTAMP);") - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, pid INTEGER, start_time TIMESTAMP, last_update TIMESTAMP, hostname TEXT, username TEXT, - CONSTRAINT monitors_constraint UNIQUE (pid,hostname));"))) - tdb)) + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + pid INTEGER, + hostname TEXT, + port INTEGER, + start_time TIMESTAMP, + priority INTEGER, + state TEXT, + CONSTRAINT servers_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, + server_id INTEGER, + pid INTEGER, + hostname TEXT, + cmdline TEXT, + login_time TIMESTAMP, + logout_time TIMESTAMP DEFAULT -1, + CONSTRAINT clients_constraint UNIQUE (pid,hostname));") + + )) + mdb)) +;;====================================================================== +;; Server and client management +;;====================================================================== + +;; state: 'live, 'shutting-down, 'dead +(define (tasks:server-register mdb pid hostname port priority state) + (sqlite3:execute + mdb + "INSERT OR REPLACE INTO servers (pid,hostname,port,start_time,priority,state) VALUES(?,?,?,strftime('%s','now'),?,?);" + pid hostname port priority (conc state))) + +(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)) + (if pid + (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" hostname pid) + (if port + (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port) + (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) + +(define (tasks:server-deregister-self mdb) + (tasks:server-deregister mdb pid: (current-process-id) (get-host-name))) + +(define (tasks:server-get-server-id mdb) + ;; dunno yet + 0) + +(define (tasks:client-register mdb pid hostname cmdline) + (sqlite3:execute + mdb + "INSERT OR REPLACE INTO clients (server_id,pid,hostname,cmdline,login_time) VALUES(?,?,?,?,strftime('%s','now'));") + (tasks:server-get-server-id mdb) + pid hostname cmdline) + +(define (tasks:client-logout mdb pid hostname cmdline) + (sqlite3:execute + mdb + "UPDATE clients SET logout_time=strftime('%s','now') WHERE pid=? AND hostname=? AND cmdline=?;" + pid hostname cmdline)) + +(define (tasks:get-logged-in-clients mdb server-id) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id server-id pid hostname cmdline login-time logout-time) + (set! res (cons (vector id server-id pid hostname cmdline login-time lougout-time) res))) + mdb + "SELECT id,server_id,pid,hostname,cmdline,login_time,logout_time FROM clients WHERE server_id=?;" + server-id))) + +(define (tasks:have-clients? mdb server-id) + (null? (tasks:get-logged-in-clients mdb server-id))) + +(define (tasks:get-best-server mdb) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id hostname port) + (set! res (list hostname port))) + mdb + "SELECT id,hostname,port FROM servers WHERE state='live' ORDER BY start_time DESC LIMIT 1;") + res)) + ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== @@ -65,32 +142,32 @@ ;;====================================================================== ;; Task Monitors ;;====================================================================== -(define (tasks:register-monitor db tdb) +(define (tasks:register-monitor db mdb) (let* ((pid (current-process-id)) (hostname (get-host-name)) (userinfo (user-information (current-user-id))) (username (car userinfo))) (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username) - (sqlite3:execute tdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" + (sqlite3:execute mdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" pid hostname username))) -(define (tasks:get-num-alive-monitors tdb) +(define (tasks:get-num-alive-monitors mdb) (let ((res 0)) (sqlite3:for-each-row (lambda (count) (set! res count)) - tdb + mdb "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" (car (user-information (current-user-id)))) res)) ;; register a task -(define (tasks:add tdb action owner target runname test item params) - (sqlite3:execute tdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) +(define (tasks:add mdb action owner target runname test item params) + (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) VALUES (?,?,'new',?,?,?,?,?,strftime('%s','now'),0);" action owner target runname @@ -105,28 +182,28 @@ (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) (cdr keys))) tmp)) ;; for use from the gui -(define (tasks:add-from-params tdb action keys key-params var-params) +(define (tasks:add-from-params mdb action keys key-params var-params) (let ((target (keys:key-vals-hash->target keys key-params)) (owner (car (user-information (current-user-id)))) (runname (hash-table-ref/default var-params "runname" #f)) (testpatts (hash-table-ref/default var-params "testpatts" "%")) (itempatts (hash-table-ref/default var-params "itempatts" "%")) (params (hash-table-ref/default var-params "params" ""))) - (tasks:add tdb action owner target runname testpatts itempatts params))) + (tasks:add mdb action owner target runname testpatts itempatts params))) ;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old ;; -(define (tasks:snag-a-task tdb) +(define (tasks:snag-a-task mdb) (let ((res #f) (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) ;; first randomly set a new to pid-hostname-hostname (sqlite3:execute - tdb + mdb "UPDATE tasks_queue SET keylock=? WHERE id IN (SELECT id FROM tasks_queue WHERE state='new' OR (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR state='reset' @@ -133,92 +210,92 @@ ORDER BY RANDOM() LIMIT 1);" keytxt) (sqlite3:for-each-row (lambda (id . rem) (set! res (apply vector id rem))) - tdb + mdb "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) (if res ;; yep, have work to be done (begin - (sqlite3:execute tdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" + (sqlite3:execute mdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" (tasks:task-get-id res)) res) #f))) -(define (tasks:reset-stuck-tasks tdb) +(define (tasks:reset-stuck-tasks mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id delta) (set! res (cons id res))) - tdb + mdb "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") (sqlite3:execute - tdb + mdb (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")))) ;; return all tasks in the tasks_queue table ;; -(define (tasks:get-tasks tdb types states) +(define (tasks:get-tasks mdb types states) (let ((res '())) (sqlite3:for-each-row (lambda (id . rem) (set! res (cons (apply vector id rem) res))) - tdb + mdb (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue " ;; WHERE ;; state IN " statesstr " AND ;; action IN " actionsstr " ORDER BY creation_time DESC;")) res)) ;; remove tasks given by a string of numbers comma separated -(define (tasks:remove-queue-entries tdb task-ids) - (sqlite3:execute tdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) +(define (tasks:remove-queue-entries mdb task-ids) + (sqlite3:execute mdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) ;; -(define (tasks:start-monitor db tdb) - (if (> (tasks:get-num-alive-monitors tdb) 2) ;; have two running, no need for more +(define (tasks:start-monitor db mdb) + (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more (debug:print-info 1 "Not starting monitor, already have more than two running") (let* ((megatestdb (conc *toppath* "/megatest.db")) (monitordbf (conc *toppath* "/monitor.db")) (last-db-update 0)) ;; (file-modification-time megatestdb))) - (task:register-monitor tdb) + (task:register-monitor mdb) (let loop ((count 0) (next-touch 0)) ;; next-touch is the time where we need to update last_update ;; if the db has been modified we'd best look at the task queue (let ((modtime (file-modification-time megatestdbpath ))) (if (> modtime last-db-update) - (tasks:process-queue db tdb last-db-update megatestdb next-touch)) + (tasks:process-queue db mdb last-db-update megatestdb next-touch)) ;; WARNING: Possible race conditon here!! ;; should this update be immediately after the task-get-action call above? (if (> (current-seconds) next-touch) (begin - (tasks:monitors-update tdb) + (tasks:monitors-update mdb) (loop (+ count 1)(+ (current-seconds) 240))) (loop (+ count 1) next-touch))))))) -(define (tasks:process-queue db tdb) - (let* ((task (tasks:snag-a-task tdb)) +(define (tasks:process-queue db mdb) + (let* ((task (tasks:snag-a-task mdb)) (action (if task (tasks:task-get-action task) #f))) (if action (print "tasks:process-queue task: " task)) (if action (case (string->symbol action) - ((run) (tasks:start-run db tdb task)) - ((remove) (tasks:remove-runs db tdb task)) - ((lock) (tasks:lock-runs db tdb task)) + ((run) (tasks:start-run db mdb task)) + ((remove) (tasks:remove-runs db mdb task)) + ((lock) (tasks:lock-runs db mdb task)) ;; ((monitor) (tasks:start-monitor db task)) - ((rollup) (tasks:rollup-runs db tdb task)) - ((updatemeta)(tasks:update-meta db tdb task)) - ((kill) (tasks:kill-monitors db tdb task)))))) + ((rollup) (tasks:rollup-runs db mdb task)) + ((updatemeta)(tasks:update-meta db mdb task)) + ((kill) (tasks:kill-monitors db mdb task)))))) -(define (tasks:get-monitors tdb) +(define (tasks:get-monitors mdb) (let ((res '())) (sqlite3:for-each-row (lambda (a . rem) (set! res (cons (apply vector a rem) res))) - tdb + mdb "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") (reverse res) )) (define (tasks:tasks->text tasks) @@ -253,31 +330,31 @@ monitors) "\n")))) ;; update the last_update field with the current time and ;; if any monitors appear dead, remove them -(define (tasks:monitors-update tdb) - (sqlite3:execute tdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" +(define (tasks:monitors-update mdb) + (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name)) (let ((deadlist '())) (sqlite3:for-each-row (lambda (id pid host last-update delta) (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") (set! deadlist (cons id deadlist))) - tdb + mdb "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") - (sqlite3:execute tdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) ) -(define (tasks:remove-monitor-record tdb) - (sqlite3:execute tdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" +(define (tasks:remove-monitor-record mdb) + (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name))) -(define (tasks:set-state tdb task-id state) - (sqlite3:execute tdb "UPDATE tasks_queue SET state=? WHERE id=?;" +(define (tasks:set-state mdb task-id state) + (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) ;;====================================================================== ;; The routines to process tasks @@ -284,11 +361,11 @@ ;;====================================================================== ;; NOTE: It might be good to add one more layer of checking to ensure ;; that no task gets run in parallel. -(define (tasks:start-run db tdb task) +(define (tasks:start-run db mdb task) (let ((flags (make-hash-table))) (hash-table-set! flags "-rerun" "NOT_STARTED") (if (not (string=? (tasks:task-get-params task) "")) (hash-table-set! flags "-setvars" (tasks:task-get-params task))) (print "Starting run " task) @@ -298,13 +375,13 @@ (tasks:task-get-name task) (tasks:task-get-test task) (tasks:task-get-item task) (tasks:task-get-owner task) flags) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) -(define (tasks:rollup-runs db tdb task) +(define (tasks:rollup-runs db mdb task) (let* ((flags (make-hash-table)) (keys (db:get-keys db)) (keyvallst (keys:target->keyval keys (tasks:task-get-target task)))) ;; (hash-table-set! flags "-rerun" "NOT_STARTED") (print "Starting rollup " task) @@ -312,6 +389,6 @@ (runs:rollup-run db keys keyvallst (tasks:task-get-name task) (tasks:task-get-owner task)) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -18,10 +18,13 @@ NEWTARGET = "-target $(OS)/$(FS)/$(VER)" TARGET = "-target ubuntu/nfs/none" all : test1 test2 test3 test4 test5 +test0 : cleanprep + cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG)& + test1 : cleanprep rm -f simplerun/megatest.db rm -rf simplelinks/ simpleruns/ mkdir -p simplelinks simpleruns cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm @@ -48,12 +51,12 @@ cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & test6: fullprep cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 @@ -61,11 +64,11 @@ cleanprep : ../*.scm Makefile */*.config # if [ -e fullrun/megatest.db ]; then sqlite3 fullrun/megatest.db "delete from metadat where var='SERVER';";fi mkdir -p /tmp/mt_runs /tmp/mt_links cd ..;make install - rm -f fullrun/logging.db + rm -f */logging.db */monitor.db touch cleanprep fullprep : cleanprep cd fullrun;$(MEGATEST) -server - -debug $(DEBUG) & sleep 5;cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% @@ -81,9 +84,12 @@ rm cleanprep kill : killall -v mtest main.sh dboard || true rm -f fullrun/megatest.db fullrun/logging.db || true - killall -v -9 mtest dboard || true + killall -v mtest dboard || true + +hardkill : kill + sleep 5;killall -v mtest main.sh dboard -9 runforever : while(ls); do runname=`date +%F-%R:%S`;(cd fullrun;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname);done Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -1,7 +1,22 @@ +;; Copyright 2006-2012, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + (require-extension test) (require-extension regex) +(require-extension srfi-18) +(import srfi-18) +(require-extension zmq) +(import zmq) (define test-work-dir (current-directory)) ;; read in all the _record files (let ((files (glob "*_records.scm"))) @@ -8,10 +23,12 @@ (for-each (lambda (file) (print "Loading " file) (load file)) files)) + +(define *runremote* #f) ;;====================================================================== ;; P R O C E S S E S ;;====================================================================== @@ -53,13 +70,38 @@ ;; test:match->sqlqry (test #f "(testname GLOB 'a' AND item_path GLOB 'b') OR (testname LIKE 'a%' AND item_path LIKE '%') OR (testname GLOB '' AND item_path LIKE 'b%')" (tests:match->sqlqry "a/b,a%,/b%")) (test #f "(testname GLOB 'a' AND item_path GLOB 'b') OR (testname LIKE 'a%' AND item_path LIKE '%') OR (testname LIKE '%' AND item_path LIKE 'b%')" (tests:match->sqlqry "a/b,a%,%/b%")) + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(test "setup for run" #t (begin (setup-for-run) + (string? (getenv "MT_RUN_AREA_HOME")))) + +(test "server-register, get-best-server" '("bob" 1234) (let ((res #f)) + (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live) + (set! res (open-run-close tasks:get-best-server tasks:open-db)) + res)) +(test "de-register server" #f (let ((res #f)) + (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) + (open-run-close tasks:get-best-server tasks:open-db))) + ;; (exit) +(set! *verbosity* 10) +(define server-pid (process-run "../../bin/megatest" (list "-server" "-" "-debug" (conc *verbosity*)))) +(sleep 3) + +(define th1 (make-thread (lambda ()(server:client-setup)))) +(thread-start! th1) + +(test #f #t (socket? *runremote*)) + ;;====================================================================== ;; C O N F I G F I L E S ;;====================================================================== (define conffile #f) @@ -78,12 +120,10 @@ (define header (list "col1" "col2" "col3" "col4")) (test "Get row by header" "blah" (db:get-value-by-header row header "col4")) ;; (define *toppath* "tests") (define *db* #f) -(test "setup for run" #t (begin (setup-for-run) - (string? (getenv "MT_RUN_AREA_HOME")))) (test "open-db" #t (begin (set! *db* (open-db)) (if *db* #t #f))) ;; quit wasting time, I'm changing *db* to db @@ -112,11 +152,11 @@ (test "get all legal tests" (list "test1" "test2") (sort (get-all-legal-tests) string<=?)) (test "register-test, test info" "NOT_STARTED" (begin - (cdb:tests-register-test *remoterun* 1 "nada" "") + (cdb:tests-register-test *runremote* 1 "nada" "") ;; (rdb:flush-queue) (vector-ref (db:get-test-info *db* 1 "nada" "") 3))) (test #f "NOT_STARTED" (begin @@ -253,24 +293,16 @@ ;;====================================================================== ;; R E M O T E C A L L S ;;====================================================================== -;; start a server process -(set! *verbosity* 10) -;; (define server-pid (process-run "../../bin/megatest" (list "-server" "-" "-debug" (conc *verbosity*)))) -;; (sleep 2) - -(define th1 (make-thread server:launch)) -(thread-start! th1) - (define start-wait (current-seconds)) (server:client-setup) (print "Starting intensive cache and rpc test") (for-each (lambda (params) ;;; (rdb:tests-register-test #f 1 (conc "test" (random 20)) "") - (apply cdb:test-set-status-state *remoterun* test-id params) + (apply cdb:test-set-status-state *runremote* test-id params) (rdb:pass-fail-counts test-id (random 100) (random 100)) (rdb:test-rollup-test_data-pass-fail test-id) (thread-sleep! 0.01)) ;; cache ordering granularity is at the second level. Should really be at the ms level '(("COMPLETED" "PASS" #f) ("NOT_STARTED" "FAIL" "Just testing") @@ -332,8 +364,10 @@ #t)) (hash-table-set! args:arg-hash ":runname" "%") (test "Remove the rollup run" #t (begin (operate-on 'remove-runs))) + +(thread-join! th1 th2 th3) ;; ADD ME!!!! (db:get-prereqs-not-met *db* 1 '("runfirst") "" mode: 'normal) ;; ADD ME!!!! (rdb:get-tests-for-run *db* 1 "runfirst" #f '() '())