Overview
Comment: | Hack to fix the sometimes won't work on some NFS servers problem |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.54 | v1.5416 |
Files: | files | file ages | folders |
SHA1: |
16119b4b9036adcdd57fbcfb0fe2d40c |
User & Date: | mrwellan on 2013-04-23 16:06:32 |
Other Links: | branch diff | manifest | tags |
Context
2013-04-25
| ||
00:06 | CHECK now rolls up as FAIL and SKIP rolls up as PASS. Fixed stats on server exit check-in: 08d6116a60 user: matt tags: v1.54 | |
2013-04-23
| ||
16:06 | Hack to fix the sometimes won't work on some NFS servers problem check-in: 16119b4b90 user: mrwellan tags: v1.54, v1.5416 | |
13:37 | Set transport type to fs if fail to connect to server check-in: f6ec489c16 user: mrwellan tags: v1.54 | |
Changes
Modified http-transport.scm from [b18abed8cd] to [ef46118e7f].
︙ | ︙ | |||
20 21 22 23 24 25 26 27 28 29 30 31 32 33 | (declare (unit http-transport)) (declare (uses common)) (declare (uses db)) (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses server)) (include "common_records.scm") (include "db_records.scm") (define (http-transport:make-server-url hostport) (if (not hostport) #f | > | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | (declare (unit http-transport)) (declare (uses common)) (declare (uses db)) (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses server)) (declare (uses daemon)) (include "common_records.scm") (include "db_records.scm") (define (http-transport:make-server-url hostport) (if (not hostport) #f |
︙ | ︙ | |||
116 117 118 119 120 121 122 | (define (http-transport:try-start-server ipaddrstr portnum) (handle-exceptions exn (begin (print-error-message exn) (if (< portnum 9000) (begin | | | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | (define (http-transport:try-start-server ipaddrstr portnum) (handle-exceptions exn (begin (print-error-message exn) (if (< portnum 9000) (begin (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; (open-run-close tasks:remove-server-records tasks:open-db) (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum) (http-transport:try-start-server ipaddrstr (+ portnum 1))) (print "ERROR: Tried and tried but could not start the server"))) ;; any error in following steps will result in a retry (set! *runremote* (list ipaddrstr portnum)) |
︙ | ︙ | |||
215 216 217 218 219 220 221 | (begin (sleep 4) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (tdb (tasks:open-db)) | > | | | > | > | | 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | (begin (sleep 4) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (tdb (tasks:open-db)) (spid ;;(open-run-close tasks:server-get-server-id tasks:open-db #f iface port #f)) (tasks:server-get-server-id tdb #f iface port #f)) (server-timeout (let ((tmo (config-lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; default to three days (* 3 24 60 60))))) (debug:print-info 2 "server-timeout: " server-timeout ", server pid: " spid " on " iface ":" port) (let loop ((count 0)) (thread-sleep! 4) ;; no need to do this very often ;; NB// sync currently does NOT return queue-length (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *runremote*) (mutex-unlock! *heartbeat-mutex*) (if (or (not (equal? sdat (list iface port))) (not spid)) (begin (debug:print-info 0 "interface changed, refreshing iface and port info") (set! iface (car sdat)) (set! port (cadr sdat)) (set! spid (tasks:server-get-server-id tdb #f iface port #f)))) ;; NOTE: Get rid of this mechanism! It really is not needed... ;; (open-run-close tasks:server-update-heartbeat tasks:open-db spid) (tasks:server-update-heartbeat tdb spid) ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout) (if (> (+ last-access server-timeout) (current-seconds)) (begin (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name)) (thread-sleep! 1) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Number of cached writes " *number-of-writes*) (debug:print-info 0 "Average cached write time " (if (eq? *number-of-writes* 0) "n/a (no writes)" (/ *writes-total-delay* |
︙ | ︙ | |||
311 312 313 314 315 316 317 318 319 320 321 322 323 324 | (thread-start! th3) (thread-start! th1) (set! *didsomething* #t) (thread-join! th2)) (debug:print 0 "ERROR: Failed to setup for megatest"))) (exit))) (define (http-transport:server-signal-handler signum) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () (thread-sleep! 1)) ;; (if (not *received-response*) | > > > > > > > > > > > > > > > > > | 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | (thread-start! th3) (thread-start! th1) (set! *didsomething* #t) (thread-join! th2)) (debug:print 0 "ERROR: Failed to setup for megatest"))) (exit))) ;; (use trace) ;; (trace http-transport:keep-running ;; tasks:server-update-heartbeat ;; tasks:server-get-server-id) ;; tasks:get-best-server ;; http-transport:run ;; http-transport:launch ;; http-transport:try-start-server ;; http-transport:client-send-receive ;; http-transport:make-server-url ;; tasks:server-register ;; tasks:server-delete ;; start-server ;; hostname->ip ;; with-input-from-request ;; tasks:server-deregister-self) (define (http-transport:server-signal-handler signum) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () (thread-sleep! 1)) ;; (if (not *received-response*) |
︙ | ︙ |
Modified megatest-version.scm from [24437845eb] to [b0fa89eaee].
1 2 3 4 5 | ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) | | | 1 2 3 4 5 6 7 | ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) (define megatest-version 1.5416) |
Modified tasks.scm from [e04939ec8c] to [4adf87fded].
︙ | ︙ | |||
24 25 26 27 28 29 30 | (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (sqlite3:set-busy-handler! mdb handler) | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (sqlite3:set-busy-handler! mdb handler) (sqlite3:execute mdb (conc "PRAGMA synchronous = 1;")) (if (not exists) (begin (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, state TEXT DEFAULT 'new', target TEXT DEFAULT '', |
︙ | ︙ | |||
139 140 141 142 143 144 145 146 147 148 149 150 151 152 | (begin (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)") "SELECT id FROM servers WHERE pid=-999;"))) (if hostname hostname iface)(if pid pid port)) res)) (define (tasks:server-update-heartbeat mdb server-id) (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)) ;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds (define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f)) (let* ((server-id (if server-id server-id (tasks:server-get-server-id mdb hostname iface port pid))) | > | 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | (begin (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)") "SELECT id FROM servers WHERE pid=-999;"))) (if hostname hostname iface)(if pid pid port)) res)) (define (tasks:server-update-heartbeat mdb server-id) (debug:print-info 0 "Heart beat update of server id=" server-id) (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)) ;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds (define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f)) (let* ((server-id (if server-id server-id (tasks:server-get-server-id mdb hostname iface port pid))) |
︙ | ︙ |