Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -157,11 +157,11 @@ (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) - ((> *api-process-request-count* 20) ;; 20) + ((> *api-process-request-count* 200) ;; 20) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") (set! *server-overloaded* #t) (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! (else (let* ((cmd-in (vector-ref dat 0)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1061,14 +1061,14 @@ (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync") (let* ((lockdat (db:no-sync-get-lock no-sync-db from-db-file)) (gotlock (car lockdat)) (locktime (cdr lockdat))) - (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: go lock?") + (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?") (if gotlock (begin - (debug:print 0 *default-log-port* "db:lock-and-delta-sync copying db") + (debug:print 0 *default-log-port* "db:lock-and-delta-sync copying db "runid" at "(current-seconds)) (db:sync-touched dbstruct runid) (db:no-sync-del! no-sync-db from-db-file) #t) (begin (debug:print 0 *default-log-port* "could not get lock for " from-db-file " from no-sync-db") Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -475,11 +475,12 @@ (set! server-going #t) (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. ;; (thread-start! *watchdog*) ) - (if no-sync-db + (if (and no-sync-db + (common:low-noise-print 5 "sync-all")) ;; cheesy way to reduce frequency of running sync :) (begin (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")) (db:all-db-sync *dbstruct-dbs*) ;; (db:do-sync no-sync-db) ;; (db:run-lock-and-sync *no-sync-db*) @@ -530,10 +531,22 @@ (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond + ((and *server-run* + (> (- (current-seconds) server-start-time) 120)) ;; let's try server replacement + ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1)) + (let* ((loaddat (common:get-normalized-cpu-load #f)) + (adj-proc-load (alist-ref 'adj-proc-load loaddat)) + (adj-core-load (alist-ref 'adj-core-load loaddat)) + (adj-load (max adj-proc-load adj-core-load))) + (if (< adj-load 2) ;; reduce chance of runaway + (server:run *toppath*)) + (db:all-db-sync *dbstruct-dbs*) + (thread-sleep! 30) + (http-transport:server-shutdown port))) ((and *server-run* (> (+ last-access server-timeout) (current-seconds))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))