Changes In Branch v1.64-load-mgmt Excluding Merge-Ins
This is equivalent to a diff from 03e6b6ae88 to 51cc0dc1c7
2017-06-21
| ||
22:37 | Added emergency patch to the mt:set-state-status issue check-in: 99a52da73d user: matt tags: v1.64 | |
16:50 | Work in progress for better load management. Closed-Leaf check-in: 51cc0dc1c7 user: mrwellan tags: v1.64-load-mgmt | |
12:30 | Merged in recent changes to v1.64 into v1.65 check-in: b960ebe7d0 user: mrwellan tags: v1.65, v1.6502 | |
12:26 | Added ability to modify or extend the blacklisted environment variables and updated manual check-in: 03e6b6ae88 user: mrwellan tags: v1.64 | |
2017-06-20
| ||
16:20 | forgot the version again check-in: 39bde846fc user: bjbarcla tags: v1.64, v1.6421 | |
Modified runs.scm from [ddf4fcce25] to [428573321e].
︙ | ︙ | |||
944 945 946 947 948 949 950 | (not (member 'exclusive testmode))))) ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing | > | > > | > > > > > > > > > > > > | 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 | (not (member 'exclusive testmode))))) ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing (let ((hh (common:get-homehost)) (maxload (configf:lookup *configdat* "jobtools" "maxload"))) (if maxload ;; only gate if maxload is specified (let loadloop ((load-dat #f)) (common:wait-for-cpuload maxload numcpus waitdelay) ;; first wait for local load to decrease if it happens to be high (if (and hh (not (common:on-homehost?))) (let* ((hh-load-dat (common:get-normalized-cpu-load hh)) (hh-load (if hh-load-dat (alist-ref 'adj-cpu-load hh-load-dat) #f))) (cond ((not hh-load)(debug:print-info 0 *default-log-port* "Could not determine load on homehost. Proceeding as if load is fine ...")) ((> hh-load maxload) (debug:print-info 0 *default-log-port "Load too high on homehost, delaying before launching tests.") (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (or (not (null? tal))(not (null? reg))) (list (runs:queue-next-hed tal reg reglen regfull) |
︙ | ︙ |
Modified server.scm from [afd86af346] to [04cdd430fa].
︙ | ︙ | |||
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 | ;; (define (server:writable-watchdog dbstruct) (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (let* (;;(dbstruct (db:setup)) (mtdb (dbr:dbstruct-mtdb dbstruct)) (mtpath (db:dbdat-get-path mtdb))) (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) | > > | | > > > > | > > | 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 | ;; (define (server:writable-watchdog dbstruct) (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) (sync-delay (string->number (or (configf:get *configdat* "server" "sync-delay") "5"))) (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (let* (;;(dbstruct (db:setup)) (mtdb (dbr:dbstruct-mtdb dbstruct)) (mtpath (db:dbdat-get-path mtdb))) (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) (let* ((load-too-high (> (common:get-normalized-cpu-load #f) 1)) (need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write (sync-in-progress *db-sync-in-progress*) (should-sync (and (not *time-to-exit*) (> (- (current-seconds) *db-last-sync*) (or (if load-too-high ;; if load is high increase delay to bigger of sync-delay or 15 sec (max sync-delay 15) #f) sync-delay)))) ;; sync every five seconds minimum (start-time (current-seconds)) (mt-mod-time (file-modification-time mtpath)) (recently-synced (< (- start-time mt-mod-time) 4)) (will-sync (and (if load-too-high should-sync (or need-sync should-sync)) (not sync-in-progress) (not recently-synced)))) (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync) ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) |
︙ | ︙ |