Overview
Comment: | Added check for version on client/server login. Converted to looking at heartbeat time instead of trying to connect to server |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | 1.5103 |
Files: | files | file ages | folders |
SHA1: |
af929ed4d8b9318358200b8bdfcdf69e |
User & Date: | matt on 2012-11-02 00:28:58 |
Other Links: | manifest | tags |
Context
2012-11-02
| ||
11:57 | Cleaned up the checks for being in a megatest area, ensure all exit correctly check-in: 52a15efc23 user: mrwellan tags: trunk, v1.5104 | |
00:28 | Added check for version on client/server login. Converted to looking at heartbeat time instead of trying to connect to server check-in: af929ed4d8 user: matt tags: trunk, 1.5103 | |
2012-11-01
| ||
14:08 | Some tweaks to accomodate long running server (lives for 48 hrs after last access) check-in: c9858f0dcb user: mrwellan tags: trunk, v1.5102 | |
Changes
Modified common.scm from [7a887c91b9] to [fed65ad912].
︙ | ︙ | |||
44 45 46 47 48 49 50 51 52 53 54 55 56 57 | (define *my-client-signature* #f) (define *rpc:listener* #f) ;; if set up for server communication this will hold the tcp port (define *runremote* #f) ;; if set up for server communication this will hold <host port> (define *last-db-access* (current-seconds)) ;; update when db is accessed via server (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *client-non-blocking-mode* #f) (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id | > > | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | (define *my-client-signature* #f) (define *rpc:listener* #f) ;; if set up for server communication this will hold the tcp port (define *runremote* #f) ;; if set up for server communication this will hold <host port> (define *last-db-access* (current-seconds)) ;; update when db is accessed via server (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *client-non-blocking-mode* #f) (define *server-id* #f) (define *time-to-exit* #f) (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id |
︙ | ︙ |
Modified db.scm from [c8003a0560] to [d14f94d1cd].
︙ | ︙ | |||
1102 1103 1104 1105 1106 1107 1108 | (cached? (cadr params)) (remparam (list-tail params 2))) (debug:print-info 12 "cdb:cached-access qry-name=" qry-name " params=" params) (if (not cached?)(db:write-cached-data)) ;; Any special calls are dispatched here. ;; Remainder are put in the db queue (case qry-name | | | | > | | > | 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 | (cached? (cadr params)) (remparam (list-tail params 2))) (debug:print-info 12 "cdb:cached-access qry-name=" qry-name " params=" params) (if (not cached?)(db:write-cached-data)) ;; Any special calls are dispatched here. ;; Remainder are put in the db queue (case qry-name ((login) ;; login checks that the megatest path and version matches (if (< (length remparam) 3) ;; should get toppath, version and signature '(#f "login failed due to missing params") ;; missing params (let ((calling-path (car remparam)) (calling-vers (cadr remparam)) (client-key (caddr remparam))) (if (and (equal? calling-path *toppath*) (equal? megatest-version calling-vers)) (begin (hash-table-set! *logged-in-clients* client-key (current-seconds)) '(#t "successful login")) ;; path matches - pass! Should vet the caller at this time ... (list #f (conc "Login failed due to mismatch paths: " calling-path ", " *toppath*)))))) ((logout) (if (and (> (length remparam) 1) (eq? *toppath* (car remparam)) |
︙ | ︙ | |||
1182 1183 1184 1185 1186 1187 1188 | (debug:print-info 11 "zmq-socket " (car params) " res=" res) res)) (define (cdb:set-verbosity zmq-socket val) (cdb:client-call zmq-socket 'set-verbosity #f val)) (define (cdb:login zmq-socket keyval signature) | | | 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 | (debug:print-info 11 "zmq-socket " (car params) " res=" res) res)) (define (cdb:set-verbosity zmq-socket val) (cdb:client-call zmq-socket 'set-verbosity #f val)) (define (cdb:login zmq-socket keyval signature) (cdb:client-call zmq-socket 'login #t keyval megatest-version signature)) (define (cdb:logout zmq-socket keyval signature) (cdb:client-call zmq-socket 'logout #t keyval signature)) (define (cdb:num-clients zmq-socket) (cdb:client-call zmq-socket 'numclients #t)) |
︙ | ︙ |
Modified megatest.scm from [140e7d9667] to [8dfdbfa547].
︙ | ︙ | |||
270 271 272 273 274 275 276 | (server:launch))) (if (or (args:get-arg "-listservers") (args:get-arg "-killserver")) (let ((tl (setup-for-run))) (if tl (let ((servers (open-run-close tasks:get-all-servers tasks:open-db)) | | | | | | < | 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | (server:launch))) (if (or (args:get-arg "-listservers") (args:get-arg "-killserver")) (let ((tl (setup-for-run))) (if tl (let ((servers (open-run-close tasks:get-all-servers tasks:open-db)) (fmtstr "~5a~8a~8a~20a~5a~20a~9a~20a\n") (servers-to-kill '())) (format #t fmtstr "Id" "MTver" "Pid" "Host" "Port" "Time" "Priority" "State") (format #t fmtstr "==" "=====" "===" "====" "====" "====" "========" "=====") (for-each (lambda (server) (let* ((killinfo (args:get-arg "-killserver")) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) (kpid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f)) (id (vector-ref server 0)) (pid (vector-ref server 1)) (hostname (vector-ref server 2)) (port (vector-ref server 3)) (start-time (vector-ref server 4)) (priority (vector-ref server 5)) (state (vector-ref server 6)) (mt-ver (vector-ref server 7)) (status (open-run-close tasks:server-alive? tasks:open-db hostname port: port)) (killed #f) (zmq-socket (if status (server:client-connect hostname port) #f))) ;; no need to login as status of #t indicates we are connecting to correct ;; server (if (or (not status) ;; no point in keeping dead records in the db (and khost-port ;; kill by host/port (equal? hostname (car khost-port)) |
︙ | ︙ | |||
314 315 316 317 318 319 320 | (equal? kpid pid)) ;;; YEP, ALL WITH PID WILL BE KILLED!!! (begin (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid) (set! killed #t) (if status (cdb:kill-server zmq-socket)) (debug:print-info 1 "Killed server by pid at " hostname ":" port))) ;; (if zmq-socket (close-socket zmq-socket)) | | | | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 | (equal? kpid pid)) ;;; YEP, ALL WITH PID WILL BE KILLED!!! (begin (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid) (set! killed #t) (if status (cdb:kill-server zmq-socket)) (debug:print-info 1 "Killed server by pid at " hostname ":" port))) ;; (if zmq-socket (close-socket zmq-socket)) (format #t fmtstr id mt-ver pid hostname port start-time priority status))) servers) (debug:print-info 1 "Done with listservers") (exit) ;; must do, would have to add checks to many/all calls below (set! *didsomething* #t)))) ;; if not list or kill then start a client (if appropriate) (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test") (eq? (length (hash-table-keys args:arg-hash)) 0)) |
︙ | ︙ |
Modified server.scm from [06d840e03e] to [8939443630].
︙ | ︙ | |||
24 25 26 27 28 29 30 | (include "common_records.scm") (include "db_records.scm") (define (server:make-server-url hostport) (if (not hostport) #f (conc "tcp://" (car hostport) ":" (cadr hostport)))) | < | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | (include "common_records.scm") (include "db_records.scm") (define (server:make-server-url hostport) (if (not hostport) #f (conc "tcp://" (car hostport) ":" (cadr hostport)))) (define (server:run hostn) (debug:print 0 "Attempting to start the server ...") (if (not *toppath*)(setup-for-run)) (let* ((zmq-socket #f) (hostname (if (string=? "-" hostn) (get-host-name) |
︙ | ︙ | |||
83 84 85 86 87 88 89 | (define (server:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown (let loop ((count 0)) (thread-sleep! 3) ;; no need to do this very often (db:write-cached-data) ;; (print "Server running, count is " count) | | > | | 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | (define (server:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown (let loop ((count 0)) (thread-sleep! 3) ;; no need to do this very often (db:write-cached-data) ;; (print "Server running, count is " count) (if (< count 2) ;; 3x3 = 9 secs aprox (loop (+ count 1)) (let ((numrunning (open-run-close db:get-count-tests-running #f))) (open-run-close tasks:server-update-heartbeat tasks:open-db *server-id*) (if (or (> numrunning 0) ;; stay alive for two days after last access (> (+ *last-db-access* (* 48 60 60))(current-seconds))) (begin (debug:print-info 2 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) (open-run-close tasks:server-deregister-self tasks:open-db) (thread-sleep! 1) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") (exit))))))) (define (server:find-free-port-and-open host s port #!key (trynum 50)) (let ((s (if s s (make-socket 'rep))) |
︙ | ︙ | |||
118 119 120 121 122 123 124 | (debug:print-info 0 "Tried ports from " (- p trynum) " to " p " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))) (let ((zmq-url (conc "tcp://" host ":" p))) (print "Trying to start server on " zmq-url) (bind-socket s zmq-url) (set! *runremote* #f) (debug:print 0 "Server started on " zmq-url) | | | 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | (debug:print-info 0 "Tried ports from " (- p trynum) " to " p " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))) (let ((zmq-url (conc "tcp://" host ":" p))) (print "Trying to start server on " zmq-url) (bind-socket s zmq-url) (set! *runremote* #f) (debug:print 0 "Server started on " zmq-url) (set! *server-id* (open-run-close tasks:server-register tasks:open-db (current-process-id) host p 0 'live)) s)))) (define (server:mk-signature) (message-digest-string (md5-primitive) (with-output-to-string (lambda () (write (list (current-directory) |
︙ | ︙ | |||
164 165 166 167 168 169 170 | ;; Do all the connection work, start a server if not already running (define (server:client-setup #!key (numtries 10)(do-ping #f)) (if (not *toppath*)(setup-for-run)) (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db do-ping: do-ping))) (if hostinfo (let ((host (car hostinfo)) | | | | 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | ;; Do all the connection work, start a server if not already running (define (server:client-setup #!key (numtries 10)(do-ping #f)) (if (not *toppath*)(setup-for-run)) (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db do-ping: do-ping))) (if hostinfo (let ((host (car hostinfo)) (port (cadr hostinfo))) ;; (zsocket (caddr hostinfo))) ;; (set! *runremote* zsocket)) (let* ((host (car hostinfo)) (port (cadr hostinfo))) (debug:print-info 2 "Setting up to connect to " hostinfo) (handle-exceptions exn (begin |
︙ | ︙ | |||
219 220 221 222 223 224 225 | (thread-start! th3) (set! *didsomething* #t) (thread-join! th3)) (debug:print 0 "ERROR: Failed to setup for megatest")))))) (define (server:client-launch #!key (do-ping #f)) (if (server:client-setup do-ping: do-ping) | | | 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 | (thread-start! th3) (set! *didsomething* #t) (thread-join! th3)) (debug:print 0 "ERROR: Failed to setup for megatest")))))) (define (server:client-launch #!key (do-ping #f)) (if (server:client-setup do-ping: do-ping) (debug:print-info 2 "connected as client") (begin (debug:print 0 "ERROR: Failed to connect as client") (exit)))) ;; ping a server and return number of clients or #f (if no response) (define (server:ping host port #!key (secs 10)(return-socket #f)) (cdb:use-non-blocking-mode |
︙ | ︙ |
Modified tasks.scm from [6027d829c1] to [5ae2b507c5].
︙ | ︙ | |||
52 53 54 55 56 57 58 | (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, pid INTEGER, hostname TEXT, port INTEGER, start_time TIMESTAMP, priority INTEGER, state TEXT, | > > | | | > | > > > > > > > > > > | > > | > > > > > > > > > > > | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, pid INTEGER, hostname TEXT, port INTEGER, start_time TIMESTAMP, priority INTEGER, state TEXT, mt_version TEXT, heartbeat TIMESTAMP, CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, server_id INTEGER, pid INTEGER, hostname TEXT, cmdline TEXT, login_time TIMESTAMP, logout_time TIMESTAMP DEFAULT -1, CONSTRAINT clients_constraint UNIQUE (pid,hostname));") )) mdb)) ;;====================================================================== ;; Server and client management ;;====================================================================== ;; state: 'live, 'shutting-down, 'dead (define (tasks:server-register mdb pid hostname port priority state) (sqlite3:execute mdb "INSERT OR REPLACE INTO servers (pid,hostname,port,start_time,priority,state,mt_version,heartbeat) VALUES(?,?,?,strftime('%s','now'),?,?,?,strftime('%s','now'));" pid hostname port priority (conc state) megatest-version) (tasks:server-get-server-id mdb hostname port pid)) ;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used! (define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)) (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid) (if pid (sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid) (if port (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port) (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) (define (tasks:server-deregister-self mdb hostname) (tasks:server-deregister mdb hostname pid: (current-process-id))) (define (tasks:server-get-server-id mdb host port pid) (let ((res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) mdb (if (and host pid) "SELECT id FROM servers WHERE hostname=? AND pid=?;" "SELECT id FROM servers WHERE hostname=? AND port=?;") host (if pid pid port)) res)) (define (tasks:server-update-heartbeat mdb server-id) (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)) ;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds (define (tasks:server-alive? mdb server-id #!key (hostname #f)(port #f)(pid #f)) (let* ((server-id (if server-id server-id (tasks:server-get-server-id mdb hostname port pid))) (heartbeat-delta 99e9)) (sqlite3:for-each-row (lambda (delta) (set! heartbeat-delta delta)) mdb "SELECT strftime('%s','now')-heartbeat FROM servers WHERE id=?;" server-id) (< (- (current-seconds) heartbeat-delta) 10))) (define (tasks:client-register mdb pid hostname cmdline) (sqlite3:execute mdb "INSERT OR REPLACE INTO clients (server_id,pid,hostname,cmdline,login_time) VALUES(?,?,?,?,strftime('%s','now'));") (tasks:server-get-server-id mdb) pid hostname cmdline) |
︙ | ︙ | |||
127 128 129 130 131 132 133 | (let ((res '()) (best #f)) (sqlite3:for-each-row (lambda (id hostname port) (set! res (cons (list hostname port) res)) (debug:print-info 1 "Found " hostname ":" port)) mdb | | | | | | > | | | | | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | (let ((res '()) (best #f)) (sqlite3:for-each-row (lambda (id hostname port) (set! res (cons (list hostname port) res)) (debug:print-info 1 "Found " hostname ":" port)) mdb "SELECT id,hostname,port FROM servers WHERE state='live' AND mt_version=? ORDER BY start_time DESC LIMIT 1;" megatest-version) ;; (print "res=" res) (if (null? res) #f (let loop ((hed (car res)) (tal (cdr res))) ;; (print "hed=" hed ", tal=" tal) (let* ((host (car hed)) (port (cadr hed)) ;; (ping-res (if do-ping (server:ping host port return-socket: #f) '(#t "NO PING" #f))) (alive (open-run-close tasks:server-alive? tasks:open-db host port: port)) ;; (car ping-res)) ;; (reason (cadr ping-res)) ;; (zsocket (caddr ping-res)) ) (if alive (list host port) ;; remove defunct server from table (begin (open-run-close tasks:server-deregister tasks:open-db host port: port) (if (null? tal) #f (loop (car tal)(cdr tal)))))))))) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname port start-time priority state mt-version) (set! res (cons (vector id pid hostname port start-time priority state mt-version) res))) mdb "SELECT id,pid,hostname,port,start_time,priority,state,mt_version FROM servers ORDER BY start_time DESC;") res)) ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== |
︙ | ︙ |
Modified tests/Makefile from [14232cd2a2] to [438ac58123].
︙ | ︙ | |||
83 84 85 86 87 88 89 90 91 | killall -v mtest main.sh dboard || true rm -f */megatest.db */logging.db */monitor.db || true killall -v mtest dboard || true hardkill : kill sleep 5;killall -v mtest main.sh dboard -9 runforever : while(ls); do runname=`date +%F-%R:%S`;(cd fullrun;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname);done | > > > | 83 84 85 86 87 88 89 90 91 92 93 94 | killall -v mtest main.sh dboard || true rm -f */megatest.db */logging.db */monitor.db || true killall -v mtest dboard || true hardkill : kill sleep 5;killall -v mtest main.sh dboard -9 listservers : cd fullrun;$(MEGATEST) -listservers runforever : while(ls); do runname=`date +%F-%R:%S`;(cd fullrun;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname);done |