︙ | | | ︙ | |
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
;;======================================================================
(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
format dot-locking csv-xml z3 udp ;; sql-de-lite
hostinfo md5 message-digest typed-records directory-utils stack
matchable regex posix (srfi 18) extras ;; tcp
(prefix nanomsg nmsg:)
(prefix sqlite3 sqlite3:)
pkts (prefix dbi dbi:)
)
(declare (unit common))
(declare (uses commonmod))
(import commonmod)
(include "common_records.scm")
;; (require-library margs)
;; (include "margs.scm")
|
|
|
>
>
|
|
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
;;======================================================================
(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
format dot-locking csv-xml z3 udp ;; sql-de-lite
hostinfo md5 message-digest typed-records directory-utils stack
matchable regex posix (srfi 18) extras ;; tcp
;; (prefix nanomsg nmsg:)
(prefix sqlite3 sqlite3:)
(prefix dbi dbi:)
)
(declare (unit common))
(declare (uses commonmod))
(import (prefix commonmod cmod:))
(import pkts)
(include "common_records.scm")
;; (require-library margs)
;; (include "margs.scm")
|
︙ | | | ︙ | |
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
|
(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
(if (not (directory-exists? "logs"))(create-directory "logs"))
(directory-fold
(lambda (file rem)
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "unable to rotate log " file ", probably handled by another process.")
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(print-call-chain (current-error-port)))
(let* ((fullname (conc "logs/" file))
(mod-time (file-modification-time fullname))
(file-age (- (current-seconds) mod-time)))
(hash-table-set! all-files file mod-time)
(if (or (and (string-match "^.*.log" file)
(> (file-size fullname) 200000))
(and (string-match "^server-.*.log" file)
|
|
|
|
>
|
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
|
(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
(if (not (directory-exists? "logs"))(create-directory "logs"))
(directory-fold
(lambda (file rem)
(handle-exceptions
exn
(begin
(debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore.")
(debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
;; (print-call-chain (current-error-port)) ;;
)
(let* ((fullname (conc "logs/" file))
(mod-time (file-modification-time fullname))
(file-age (- (current-seconds) mod-time)))
(hash-table-set! all-files file mod-time)
(if (or (and (string-match "^.*.log" file)
(> (file-size fullname) 200000))
(and (string-match "^server-.*.log" file)
|
︙ | | | ︙ | |
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
|
(if val val default)))
(define (assoc/default key lst . default)
(let ((res (assoc key lst)))
(if res (cadr res)(if (null? default) #f (car default)))))
(define (common:get-testsuite-name)
(or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description.
(configf:lookup *configdat* "setup" "testsuite" )
(getenv "MT_TESTSUITE_NAME")
(if (string? *toppath* )
(pathname-file *toppath*)
#f))) ;; (pathname-file (current-directory)))))
;; safe getting of toppath
(define (common:get-toppath areapath)
(or *toppath*
(if areapath
(begin
(set! *toppath* areapath)
|
<
<
<
|
<
<
|
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
|
(if val val default)))
(define (assoc/default key lst . default)
(let ((res (assoc key lst)))
(if res (cadr res)(if (null? default) #f (car default)))))
(define (common:get-testsuite-name)
(cmod:get-testsuite-name *toppath* *configdat*))
;; safe getting of toppath
(define (common:get-toppath areapath)
(or *toppath*
(if areapath
(begin
(set! *toppath* areapath)
|
︙ | | | ︙ | |
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
|
;; use bash to expand a glob. Does NOT handle paths with spaces!
;;
(define (common:bash-glob instr)
(string-split
(with-input-from-pipe
(conc "/bin/bash -c \"echo " instr "\"")
read-line)))
;;======================================================================
;; T A R G E T S , S T A T E , S T A T U S ,
;; R U N N A M E A N D T E S T P A T T
;;======================================================================
;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t))))
|
|
>
>
>
>
>
>
>
>
>
>
>
>
|
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
|
;; use bash to expand a glob. Does NOT handle paths with spaces!
;;
(define (common:bash-glob instr)
(string-split
(with-input-from-pipe
(conc "/bin/bash -c \"echo " instr "\"")
read-line)))
;;======================================================================
;; Some safety net stuff
;;======================================================================
;; return input if it is a list or return null
(define (common:list-or-null inlst #!key (ovrd #f)(message #f))
(if (list? inlst)
inlst
(begin
(if message (debug:print-error 0 *default-log-port* message))
(or ovrd '()))))
;;======================================================================
;; T A R G E T S , S T A T E , S T A T U S ,
;; R U N N A M E A N D T E S T P A T T
;;======================================================================
;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t))))
|
︙ | | | ︙ | |
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
|
(if (> curr-rownum rownum) curr-rownum rownum)
(if (> curr-colnum colnum) curr-colnum colnum)
))))))
;; if it looks like a number -> convert it to a number, else return it
;;
(define (common:lazy-convert inval)
(let* ((as-num (if (string? inval)(string->number inval) #f)))
(or as-num inval)))
;; convert string a=1; b=2; c=a silly thing; d=
;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
;;
(define (common:val->alist val #!key (convert #f))
(let ((val-list (string-split-fields ";\\s*" val #:infix)))
(if val-list
(map (lambda (x)
(let ((f (string-split-fields "\\s*=\\s*" x #:infix)))
(case (length f)
((0) `(,#f)) ;; null string case
((1) `(,(string->symbol (car f))))
((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f)))
(if convert (common:lazy-convert inval) inval))))
(else f))))
val-list)
'())))
;;======================================================================
;; S Y S T E M S T U F F
;;======================================================================
;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
|
<
|
<
<
<
<
<
<
<
<
|
<
<
<
|
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
|
(if (> curr-rownum rownum) curr-rownum rownum)
(if (> curr-colnum colnum) curr-colnum colnum)
))))))
;; if it looks like a number -> convert it to a number, else return it
;;
(define (common:lazy-convert inval)
(cmod:lazy-convert inval))
;; convert string a=1; b=2; c=a silly thing; d=
;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
;;
(define (common:val->alist val #!key (convert #f))
(cmod:val->alist val #!key (convert #f)))
;;======================================================================
;; S Y S T E M S T U F F
;;======================================================================
;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
|
︙ | | | ︙ | |
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
|
;; (set! cpu-load newval))))))
;; (car load-res))
;; cpu-load))
;; get values from cached info from dropping file in logs dir
;; e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 5))
(if *toppath*
(let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")))
(if (and (file-exists? fullpath)
(file-read-access? fullpath))
(handle-exceptions
exn
#f
(debug:print 2 *default-log-port* "reading file " fullpath)
(let ((real-age (- (current-seconds)(file-change-time fullpath))))
(if (< real-age age)
(with-input-from-file fullpath read)
(begin
(debug:print 1 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it")
#f))))
(begin
(debug:print 2 *default-log-port* "not reading file " fullpath)
#f)))
#f))
(define (common:write-cached-info key dtype dat)
(if *toppath*
(let* ((fulldir (conc *toppath* "/.sysdata"))
(fullpath (conc fulldir "/" key "-" dtype ".log")))
(if (not (file-exists? fulldir))(create-directory fulldir #t))
(handle-exceptions
exn
#f
(with-output-to-file fullpath (lambda ()(pp dat)))))
#f))
;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
(handle-exceptions
exn
'(99 99 99)
(let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
|
|
|
|
>
>
>
>
>
>
>
>
|
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
|
;; (set! cpu-load newval))))))
;; (car load-res))
;; cpu-load))
;; get values from cached info from dropping file in logs dir
;; e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 10))
(if *toppath*
(let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")))
(if (and (file-exists? fullpath)
(file-read-access? fullpath))
(handle-exceptions
exn
#f
(debug:print 2 *default-log-port* "reading file " fullpath)
(let ((real-age (- (current-seconds)(file-change-time fullpath))))
(if (< real-age age)
(with-input-from-file fullpath read)
(begin
(debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it")
#f))))
(begin
(debug:print 2 *default-log-port* "not reading file " fullpath)
#f)))
#f))
(define (common:write-cached-info key dtype dat)
(if *toppath*
(let* ((fulldir (conc *toppath* "/.sysdata"))
(fullpath (conc fulldir "/" key "-" dtype ".log")))
(if (not (file-exists? fulldir))(create-directory fulldir #t))
(handle-exceptions
exn
#f
(with-output-to-file fullpath (lambda ()(pp dat)))))
#f))
(define (common:raw-get-remote-host-load remote-host)
(handle-exceptions
exn
#f ;; more specific handling of errors needed
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/loadavg")
(lambda ()(list (read)(read)(read))))))
;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
(handle-exceptions
exn
'(99 99 99)
(let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
|
︙ | | | ︙ | |
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
|
numcpu)
(read-line))))))
(result (if remote-host
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/cpuinfo")
proc)
(with-input-from-file "/proc/cpuinfo" proc))))
(if (> result 0)(common:write-cached-info actual-host "num-cpus" result))
result))))
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload msg remote-host #!optional (rem-tries 5))
(let ((num-cpus (common:get-num-cpus remote-host)))
(if num-cpus
|
>
>
|
|
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
|
numcpu)
(read-line))))))
(result (if remote-host
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/cpuinfo")
proc)
(with-input-from-file "/proc/cpuinfo" proc))))
(if (and (number? result)
(> result 0))
(common:write-cached-info actual-host "num-cpus" result))
result))))
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload msg remote-host #!optional (rem-tries 5))
(let ((num-cpus (common:get-num-cpus remote-host)))
(if num-cpus
|
︙ | | | ︙ | |
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
|
maxload-in
(max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
(loadjmp (- first next))
(adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) )) )));; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)
(cond
((and (> first adjload)
(> count 0))
(debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
(thread-sleep! adjwait)
(common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
((and (> loadjmp numcpus)
|
>
>
|
|
|
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
|
maxload-in
(max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
(loadjmp (- first next))
(adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) )) )));; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
;; let's let the user know once in a long while that load checking is happening but not constantly report it
(if (> (random 100) 75) ;; about 25% of the time
(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
", load: " first ", adjload: " adjload ", loadjmp: " loadjmp))
(cond
((and (> first adjload)
(> count 0))
(debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
(thread-sleep! adjwait)
(common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
((and (> loadjmp numcpus)
|
︙ | | | ︙ | |