541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
|
(if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
(file-exists? fullname)) ;; just in case it was gzipped - will get it next time
(handle-exceptions
exn
#f
(if (directory? fullname)
(begin
(debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
(inc-stat "directories"))
(begin
(delete-file* fullname)
(inc-stat "deleted")))
(hash-table-delete! all-files file)))))))
'()
"logs")
|
|
|
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
|
(if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
(file-exists? fullname)) ;; just in case it was gzipped - will get it next time
(handle-exceptions
exn
#f
(if (directory? fullname)
(begin
(debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
(inc-stat "directories"))
(begin
(delete-file* fullname)
(inc-stat "deleted")))
(hash-table-delete! all-files file)))))))
'()
"logs")
|
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
|
(lambda (a b)
(< (hash-table-ref all-files a)(hash-table-ref all-files b))))
(- num-logs max-allowed))))
(for-each
(lambda (file)
(let* ((fullname (conc "logs/" file)))
(if (directory? fullname)
(debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
(handle-exceptions
exn
(debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
(delete-file* fullname)))))
files)
(debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
|
|
|
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
|
(lambda (a b)
(< (hash-table-ref all-files a)(hash-table-ref all-files b))))
(- num-logs max-allowed))))
(for-each
(lambda (file)
(let* ((fullname (conc "logs/" file)))
(if (directory? fullname)
(debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
(handle-exceptions
exn
(debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
(delete-file* fullname)))))
files)
(debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
|
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
|
;; calculate a delay number based on a droop curve
;; inputs are:
;; - load-in, load as from uptime, NOT normalized
;; - numcpus, number of cpus, ideally use the real cpus, not threads
;;
(define (common:get-delay load-in numcpus)
(let* ((ratio (/ load-in numcpus))
(new-option (configf:lookup *configdat* "load" "new-load-method")))
(if new-option
(begin
(cond ((and (>= ratio 0) (< ratio .5))
0
)
((and (>= ratio 0.5) (<= ratio .9))
(* ratio (/ 5 .9)))
((and (> ratio .9) (<= ratio 1.1))
(+ 5 (* (- ratio .9) (/ 55 .2))))
((> ratio 1.1)
60)))
(max (/ (expt 5 (* 4 ratio)) 10) 0))))
(define (get-cpu-load #!key (remote-host #f))
(car (common:get-cpu-load remote-host)))
;; (let* ((load-res (process:cmd-run->list "uptime"))
;; (load-rx (regexp "load average:\\s+(\\d+)"))
;; (cpu-load #f))
;; (for-each (lambda (l)
;; (let ((match (string-search load-rx l)))
;; (if match
;; (let ((newval (string->number (cadr match))))
;; (if (number? newval)
;; (set! cpu-load newval))))))
;; (car load-res))
;; cpu-load))
;; get values from cached info from dropping file in logs dir
;; e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 10))
(if *toppath*
(let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")))
(if (and (file-exists? fullpath)
(file-read-access? fullpath))
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn)
#f)
(debug:print 2 *default-log-port* "reading file " fullpath)
(let ((real-age (- (current-seconds)(file-change-time fullpath))))
(if (< real-age age)
(handle-exceptions
exn
(begin
(debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn)
(delete-file* fullpath)
#f)
(with-input-from-file fullpath read))
(begin
(debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it")
#f))))
(begin
(debug:print 2 *default-log-port* "not reading file " fullpath)
#f)))
#f))
(define (common:write-cached-info key dtype dat)
|
|
>
>
>
|
|
|
<
|
|
|
|
|
|
>
>
>
|
>
>
>
>
>
>
>
>
|
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
|
<
|
|
|
|
>
|
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
|
;; calculate a delay number based on a droop curve
;; inputs are:
;; - load-in, load as from uptime, NOT normalized
;; - numcpus, number of cpus, ideally use the real cpus, not threads
;;
(define (common:get-delay load-in numcpus)
(let* ((ratio (/ load-in numcpus))
(new-option (configf:lookup *configdat* "load" "new-load-method"))
(paramstr (or (configf:lookup *configdat* "load" "exp-params")
"15 12 1281453987.9543 0.75")) ;; 5 4 10 1"))
(paramlst (map string->number (string-split paramstr))))
(if new-option
(begin
(cond ((and (>= ratio 0) (< ratio .5))
0)
((and (>= ratio 0.5) (<= ratio .9))
(* ratio (/ 5 .9)))
((and (> ratio .9) (<= ratio 1.1))
(+ 5 (* (- ratio .9) (/ 55 .2))))
((> ratio 1.1)
60)))
(match paramlst
((r1 r2 s1 s2)
(debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2)
(min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30))
(else
(debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr)
30)))))
(define (common:print-delay-table)
(let loop ((x 0))
(print x "," (common:get-delay x 1))
(if (< x 2)
(loop (+ x 0.1)))))
(define (get-cpu-load #!key (remote-host #f))
(car (common:get-cpu-load remote-host)))
;; (let* ((load-res (process:cmd-run->list "uptime"))
;; (load-rx (regexp "load average:\\s+(\\d+)"))
;; (cpu-load #f))
;; (for-each (lambda (l)
;; (let ((match (string-search load-rx l)))
;; (if match
;; (let ((newval (string->number (cadr match))))
;; (if (number? newval)
;; (set! cpu-load newval))))))
;; (car load-res))
;; cpu-load))
;; get values from cached info from dropping file in logs dir
;; e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 10))
(if *toppath*
(let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))
(delfile (lambda ()
(debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn)
(delete-file* fullpath)
#f)))
(if (and (file-exists? fullpath)
(file-read-access? fullpath))
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn)
#f)
(debug:print 2 *default-log-port* "reading file " fullpath)
(let ((real-age (- (current-seconds)
(handle-exceptions
exn
(begin
(debug:print 1 *default-log-port* "Failed to read mod time on file "
fullpath ", using 0, exn=" exn)
0)
(file-change-time fullpath)))))
(if (< real-age age)
(handle-exceptions
exn
(delfile)
(let* ((res (with-input-from-file fullpath read)))
(if (eof-object? res)
(begin
(delfile)
#f)
res)))
(begin
(debug:print-info 2 *default-log-port* "file " fullpath
" is too old (" real-age" seconds) to trust, skipping reading it")
#f))))
(begin
(debug:print 2 *default-log-port* "not reading file " fullpath)
#f)))
#f))
(define (common:write-cached-info key dtype dat)
|
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
|
(define (common:wait-for-homehost-load maxnormload msg)
(let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
#f
(common:get-homehost)))
(hh (if hh-dat (car hh-dat) #f)))
(common:wait-for-normalized-load maxnormload msg hh)))
(define (common:get-num-cpus remote-host)
(let* ((actual-host (or remote-host (get-host-name))))
(or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) ;; hosts had better not be changing the number of cpus too often!
(let* ((proc (lambda ()
(let loop ((numcpu 0)
(inl (read-line)))
(if (eof-object? inl)
(if (> numcpu 0)
numcpu
#f) ;; if zero return #f so caller knows that things are not working
(loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
(+ numcpu 1)
numcpu)
(read-line))))))
(result (if remote-host
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/cpuinfo")
proc)
(with-input-from-file "/proc/cpuinfo" proc))))
(if (and (number? result)
(> result 0))
(common:write-cached-info actual-host "num-cpus" result))
result))))
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
(let ((num-cpus (common:get-num-cpus remote-host)))
(if num-cpus
(common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host)
|
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
|
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
|
(define (common:wait-for-homehost-load maxnormload msg)
(let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
#f
(common:get-homehost)))
(hh (if hh-dat (car hh-dat) #f)))
(common:wait-for-normalized-load maxnormload msg hh)))
(define *numcpus-cache* (make-hash-table))
(define (common:get-num-cpus remote-host)
(let* ((actual-host (or remote-host (get-host-name))))
;; hosts had better not be changing the number of cpus too often!
(or (hash-table-ref/default *numcpus-cache* actual-host #f)
(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
(let* ((proc (lambda ()
(let loop ((numcpu 0)
(inl (read-line)))
(if (eof-object? inl)
(if (> numcpu 0)
numcpu
#f) ;; if zero return #f so caller knows that things are not working
(loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
(+ numcpu 1)
numcpu)
(read-line))))))
(result (if remote-host
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/cpuinfo")
proc)
(with-input-from-file "/proc/cpuinfo" proc))))
(if (and (number? result)
(> result 0))
(common:write-cached-info actual-host "num-cpus" result))
result))))
(hash-table-set! *numcpus-cache* actual-host numcpus)
numcpus))))
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
(let ((num-cpus (common:get-num-cpus remote-host)))
(if num-cpus
(common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host)
|
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
|
(effective-load (common:get-intercept first next))
(recommended-delay (common:get-delay effective-load numcpus))
(effective-host (or remote-host "localhost"))
(normalized-effective-load (/ effective-load numcpus))
(will-wait (> normalized-effective-load maxnormload)))
(if (> recommended-delay 0)
(let* ((actual-delay (min recommended-delay 30)))
(debug:print-info 0 *default-log-port* "Load control, delaying " actual-delay " seconds to maintain safe load.")
(thread-sleep! actual-delay)))
(cond
;; bad data, try again to get the data
((not will-wait)
(if (common:low-noise-print 30 (conc (round normalized-effective-load) "-load-acceptable-" effective-host))
(debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing.")))
|
>
|
>
>
|
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
|
(effective-load (common:get-intercept first next))
(recommended-delay (common:get-delay effective-load numcpus))
(effective-host (or remote-host "localhost"))
(normalized-effective-load (/ effective-load numcpus))
(will-wait (> normalized-effective-load maxnormload)))
(if (> recommended-delay 0)
(let* ((actual-delay (min recommended-delay 30)))
(if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load"))
(debug:print-info 0 *default-log-port* "Load control, delaying "
actual-delay " seconds to maintain safe load. current normalized effective load is "
normalized-effective-load"."))
(thread-sleep! actual-delay)))
(cond
;; bad data, try again to get the data
((not will-wait)
(if (common:low-noise-print 30 (conc (round normalized-effective-load) "-load-acceptable-" effective-host))
(debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing.")))
|