Overview
Comment: | Getting close on gating runs from starting new tests on server load high. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.81-multi-server |
Files: | files | file ages | folders |
SHA1: |
6a90d15b55723616e5cd53c9083c7cbc |
User & Date: | matt on 2024-07-08 03:00:57 |
Other Links: | branch diff | manifest | tags |
Context
2024-07-08
| ||
06:01 | wip (still broke) check-in: 00c25a6b53 user: matt tags: v1.81-multi-server | |
03:00 | Getting close on gating runs from starting new tests on server load high. check-in: 6a90d15b55 user: matt tags: v1.81-multi-server | |
2024-07-07
| ||
20:09 | Sort servers based on number of threads running to estimate load check-in: af60709165 user: matt tags: v1.81-multi-server | |
Changes
Modified rmtmod.scm from [bb5d679cbc] to [1cfe9c07c7].
︙ | |||
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | + + + + + + + + + | ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;;====================================================================== (declare (unit rmtmod)) (declare (uses debugprint)) ;; (declare (uses debugprint.import)) (declare (uses commonmod)) ;; (declare (uses commonmod.import)) (declare (uses dbfile)) ;; needed for records (declare (uses dbmod)) ;; (declare (uses tcp-transportmod)) ;; (declare (uses tcp-transportmod.import)) ;; (declare (uses apimod)) ;; (declare (uses apimod.import)) ;; (declare (uses ulex)) ;; (include "ulex/ulex.scm") (module rmtmod * (import scheme chicken data-structures extras matchable srfi-69) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod dbfile debugprint) ;; (prefix commonmod cmod:)) (import dbmod ;; tcp-transportmod ) ;; (import apimod) ;; (import (prefix ulex ulex:)) (include "db_records.scm") (defstruct alldat (areapath #f) |
︙ | |||
303 304 305 306 307 308 309 310 | 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | + + + + + + + + + + + + + + + + + - + + | run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))))))) ;; call end of eud of run detection for posthook - from merge, is it needed? ;; (launch:end-of-run-check run-id) all-ids) ))))) ;;====================================================================== ;; Misc ;;====================================================================== ;; (define (rmtmod:wait-on-server-load run-id ttdat) ;; (let* ((dbfname (dbmod:run-id->dbfname run-id)) ;; (get-lowest-thread-load ;; (lambda () ;; (let* ((sdats (tt:get-server-info-sorted ttdat dbfname))) ;; (car (map tt:get-server-threads sdats)))))) ;; (if ttdat ;; (let loop () ;; (if (> (get-lowest-thread-load) 5) ;; load is pretty high ;; (begin ;; (debug:print 0 *default-log-port* "Servers appear overloaded, waiting...") ;; (thread-sleep! 1) ;; (loop)))) ;; (debug:print 0 *default-log-port* "Can't wait on server load, *ttdat* not set")))) |
Modified runs.scm from [c4364e3870] to [0cd899f860].
︙ | |||
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | + + | (declare (uses tests)) (declare (uses server)) (declare (uses mt)) (declare (uses archive)) (declare (uses mtargs)) (declare (uses rmtmod)) (declare (uses dbfile)) (declare (uses tcp-transportmod)) (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format sxml-serializer sxml-modifications matchable) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") ;; (include "debugger.scm") (import commonmod debugprint rmtmod dbfile tcp-transportmod (prefix mtargs args:)) ;; use this struct to facilitate refactoring ;; (defstruct runs:dat reglen regfull |
︙ | |||
1189 1190 1191 1192 1193 1194 1195 | 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 | - - - + + + + + + + | (if (and (not (common:on-homehost?)) maxload) ;; only gate if maxload is specified, NOTE: maxload is normalized, i.e. load=1 means all cpus fully utilized (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f)) ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues (if maxhomehostload (common:wait-for-homehost-load maxhomehostload |
︙ |
Modified tcp-transportmod.scm from [a6f9fa170f] to [494ffa0754].
︙ | |||
283 284 285 286 287 288 289 | 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 | - + + - + - + + - + + - + + + - + + + + + + + + + + + + + + + + | (tt:client-connect-to-server ttdat dbfname run-id testsuite server-start-proc))))))) ;; returns ( result . ping_time ) (define (tt:timed-ping host port server-id) (let* ((start-time (current-milliseconds)) (result (tt:ping host port server-id))) (cons result (- (current-milliseconds) start-time)))) |
︙ |