Overview
Comment: | load throttling for homehost |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64 |
Files: | files | file ages | folders |
SHA1: |
7f8fc94635b054b4288eccdaf8fc2cb0 |
User & Date: | mrwellan on 2017-06-22 10:03:55 |
Other Links: | branch diff | manifest | tags |
Context
2017-06-22
| ||
10:23 | Added emergency patch to common:cpu* and runs:process-expanded-tests to throttle back when cpu load on homehost is too high check-in: 1a2b766602 user: mrwellan tags: v1.64 | |
10:03 | load throttling for homehost check-in: 7f8fc94635 user: mrwellan tags: v1.64 | |
2017-06-21
| ||
22:37 | Added emergency patch to the mt:set-state-status issue check-in: 99a52da73d user: matt tags: v1.64 | |
Changes
cgisetup/cgi-bin/models became a regular file with contents [39c07627cc].
cgisetup/cgi-bin/pages became a regular file with contents [e2b5ed002d].
Modified common.scm from [fc28390531] to [85a0309836].
︙ | ︙ | |||
335 336 337 338 339 340 341 | (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") (exit 1)) (read-only (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") (exit 1)) (else (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") | | | | | | 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 | (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") (exit 1)) (read-only (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") (exit 1)) (else (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") (exit 1))))))) ;; (begin ;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") ;; (exit 1)))) ;;====================================================================== ;; S P A R S E A R R A Y S ;;====================================================================== (define (make-sparse-array) (let ((a (make-sparse-vector))) |
︙ | ︙ | |||
1479 1480 1481 1482 1483 1484 1485 | ((< (+ load (/ (random 250) 1000)) ;; add a random factor to keep from getting in a rut (+ best-load (/ (random 250) 1000)) ) (set! best-load load) (set! best-host hostname))))) hosts) best-host)) | < < < | > > > > > > > > | | 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 | ((< (+ load (/ (random 250) 1000)) ;; add a random factor to keep from getting in a rut (+ best-load (/ (random 250) 1000)) ) (set! best-load load) (set! best-host hostname))))) hosts) best-host)) (define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) (let* ((loadavg (common:get-cpu-load remote-host)) (first (car loadavg)) (next (cadr loadavg)) (adjload (* maxload numcpus)) (loadjmp (- first next))) (cond ((and (> first adjload) (> count 0)) (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg "")) (thread-sleep! waitdelay) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))) ((and (> loadjmp numcpus) (> count 0)) (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) (thread-sleep! waitdelay) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))))) (define (common:wait-for-homehost-load maxload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f)) (numcpus (common:get-num-cpus hh))) (common:wait-for-normalized-load maxload msg: msg remote-host: hh))) (define (common:get-num-cpus remote-host) (let ((proc (lambda () (let loop ((numcpu 0) (inl (read-line))) (if (eof-object? inl) numcpu (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line))))))) (if remote-host (with-input-from-pipe (conc "ssh " remote-host " cat /proc/cpuinfo") proc) (with-input-from-file "/proc/cpuinfo" proc)))) ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f)) (let ((num-cpus (common:get-num-cpus remote-host))) (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) (define (get-uname . params) (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) (uname #f)) (if (null? (car uname-res)) "unknown" (caar uname-res)))) |
︙ | ︙ |
Modified runs.scm from [ddf4fcce25] to [a34127fa90].
︙ | ︙ | |||
839 840 841 842 843 844 845 | '()))) (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! (not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner (numcpus (common:get-num-cpus #f)) | | > | | 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 | '()))) (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! (not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner (numcpus (common:get-num-cpus #f)) (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) |
︙ | ︙ | |||
944 945 946 947 948 949 950 | (not (member 'exclusive testmode))))) ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing | | | > > > | 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 | (not (member 'exclusive testmode))))) ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing (if maxload ;; only gate if maxload is specified (common:wait-for-cpuload maxload numcpus waitdelay)) (if maxhomehostload (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (or (not (null? tal))(not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) |
︙ | ︙ |