1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
|
(common:get-num-cpus remote-host)
numcpus-in))
(maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
(loadjmp (- first next))
(adjwait (+ (/ (- 1000 count) 10) waitdelay)))
(cond
((and (> first adjload)
(> count 0))
(debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
(thread-sleep! adjwait)
(common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
((and (> loadjmp numcpus)
|
|
|
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
|
(common:get-num-cpus remote-host)
numcpus-in))
(maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
(loadjmp (- first next))
(adjwait (+ (random 10)(/ (- 1000 count) 10) waitdelay))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
(cond
((and (> first adjload)
(> count 0))
(debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
(thread-sleep! adjwait)
(common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
((and (> loadjmp numcpus)
|