Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -310,10 +310,11 @@ (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) (server-info (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive + (connect-time (current-seconds)) (conndat #f) (transport *transport-type*) (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -64,11 +64,11 @@ (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) - + (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) (cond ((> attemptnum 2) (thread-sleep! 0.05)) @@ -120,10 +120,17 @@ (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond + ((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds + (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") + (set! *runremote* #f) + ;; BUG: close-connections should go here? + (mutex-unlock! *rmt-mutex*) + (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) + ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")