Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -18,12 +18,17 @@ ;;====================================================================== (declare (unit rmtmod)) (declare (uses debugprint)) +;; (declare (uses debugprint.import)) (declare (uses commonmod)) +;; (declare (uses commonmod.import)) (declare (uses dbfile)) ;; needed for records +(declare (uses dbmod)) +;; (declare (uses tcp-transportmod)) +;; (declare (uses tcp-transportmod.import)) ;; (declare (uses apimod)) ;; (declare (uses apimod.import)) ;; (declare (uses ulex)) @@ -33,10 +38,14 @@ * (import scheme chicken data-structures extras matchable srfi-69) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod dbfile debugprint) ;; (prefix commonmod cmod:)) +(import dbmod + ;; tcp-transportmod + ) + ;; (import apimod) ;; (import (prefix ulex ulex:)) (include "db_records.scm") @@ -305,7 +314,25 @@ ;; call end of eud of run detection for posthook - from merge, is it needed? ;; (launch:end-of-run-check run-id) all-ids) ))))) +;;====================================================================== +;; Misc +;;====================================================================== + +;; (define (rmtmod:wait-on-server-load run-id ttdat) +;; (let* ((dbfname (dbmod:run-id->dbfname run-id)) +;; (get-lowest-thread-load +;; (lambda () +;; (let* ((sdats (tt:get-server-info-sorted ttdat dbfname))) +;; (car (map tt:get-server-threads sdats)))))) +;; (if ttdat +;; (let loop () +;; (if (> (get-lowest-thread-load) 5) ;; load is pretty high +;; (begin +;; (debug:print 0 *default-log-port* "Servers appear overloaded, waiting...") +;; (thread-sleep! 1) +;; (loop)))) +;; (debug:print 0 *default-log-port* "Can't wait on server load, *ttdat* not set")))) ) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -29,10 +29,11 @@ (declare (uses mt)) (declare (uses archive)) (declare (uses mtargs)) (declare (uses rmtmod)) (declare (uses dbfile)) +(declare (uses tcp-transportmod)) (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format sxml-serializer sxml-modifications matchable) @@ -48,10 +49,11 @@ (import commonmod debugprint rmtmod dbfile + tcp-transportmod (prefix mtargs args:)) ;; use this struct to facilitate refactoring ;; @@ -1191,13 +1193,17 @@ (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f)) ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues (if maxhomehostload (common:wait-for-homehost-load maxhomehostload - (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))))) + (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) + + ;; lastly lets check the servers are not overloaded by looking at threads + (tt:wait-on-server-load run-id *ttdat*) + + ))) - (if (and (not (null? prereqs-not-met)) (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -285,35 +285,55 @@ ;; returns ( result . ping_time ) (define (tt:timed-ping host port server-id) (let* ((start-time (current-milliseconds)) (result (tt:ping host port server-id))) (cons result (- (current-milliseconds) start-time)))) - + +;; host:port => ( meta . when-updated) (define *server-load* (make-hash-table)) (define (tt:save-server-meta host port meta) - (hash-table-set! *server-load* (conc host":"port) meta)) + (hash-table-set! *server-load* (conc host":"port) (cons meta (current-seconds)))) (define (tt:get-server-threads dat) (let* ((host (car dat)) (port (cadr dat)) - (meta (tt:get-server-meta host port #t))) + (dat (tt:get-server-meta host port #t)) + (meta (car dat))) (if (list? meta) (alist-ref 'sload meta) #f))) - + ;; lazy get, does not auto-refresh meta, this might be a problem ;; (define (tt:get-server-meta host port #!optional (do-ping #f)) - (let* ((meta (hash-table-ref/default *server-load* (conc host":"port) #f))) + (let* ((get-meta (lambda () + (let* ((dat (hash-table-ref/default *server-load* (conc host":"port) #f))) + (if dat (car dat) #f)))) + (meta (get-meta))) (if (and (not meta) do-ping) (begin (tt:timed-ping host port #f) - (hash-table-ref/default *server-load* (conc host":"port) #f)) + (get-meta)) meta))) +(define (tt:wait-on-server-load run-id ttdat) + (let* ((dbfname (dbmod:run-id->dbfname run-id)) + (get-lowest-thread-load + (lambda () + (let* ((sdats (tt:get-server-info-sorted ttdat dbfname))) + (car (map tt:get-server-threads sdats)))))) + (if ttdat + (let loop () + (if (> (get-lowest-thread-load) 5) ;; load is pretty high + (begin + (debug:print 0 *default-log-port* "Servers appear overloaded, waiting...") + (thread-sleep! 1) + (loop)))) + (debug:print 0 *default-log-port* "Can't wait on server load, *ttdat* not set")))) + (define (tt:ping host port server-id #!optional (tries-left 5)) (let* ((res (tt:send-receive-direct host port `(ping #f #f #f) ping-mode: #t)) ;; please send me your server-id (try-again (lambda () (if (> tries-left 0) (begin