129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
(define *keys* (make-hash-table)) ;; cache the keys here
(define *keyvals* (make-hash-table))
(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here
(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db
(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex* (make-mutex))
;; cache environment vars for each run here
(define *env-vars-by-run-id* (make-hash-table))
;; Testconfig and runconfig caches.
(define *testconfigs* (make-hash-table)) ;; test-name => testconfig
(define *runconfigs* (make-hash-table)) ;; target => runconfig
;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than
;; five seconds ago
(define *pre-reqs-met-cache* (make-hash-table))
;; cache of verbosity given string
;;
(define *verbosity-cache* (make-hash-table))
(define (common:clear-caches)
(set! *target* (make-hash-table))
(set! *keys* (make-hash-table))
(set! *keyvals* (make-hash-table))
(set! *toptest-paths* (make-hash-table))
(set! *test-paths* (make-hash-table))
|
|
>
>
>
>
>
>
>
>
>
|
|
|
|
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
(define *keys* (make-hash-table)) ;; cache the keys here
(define *keyvals* (make-hash-table))
(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here
(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db
(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex* (make-mutex))
;; launching and hosts
(defstruct host
(reachable #f)
(last-update 0)
(last-used 0)
(last-cpuload 1))
(define *host-loads* (make-hash-table))
;; cache environment vars for each run here
(define *env-vars-by-run-id* (make-hash-table))
;; Testconfig and runconfig caches.
(define *testconfigs* (make-hash-table)) ;; test-name => testconfig
(define *runconfigs* (make-hash-table)) ;; target => runconfig
;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than
;; five seconds ago
(define *pre-reqs-met-cache* (make-hash-table))
;; cache of verbosity given string
;;
(define *verbosity-cache* (make-hash-table))
(define (common:clear-caches)
(set! *target* (make-hash-table))
(set! *keys* (make-hash-table))
(set! *keyvals* (make-hash-table))
(set! *toptest-paths* (make-hash-table))
(set! *test-paths* (make-hash-table))
|
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
|
(map (lambda (res)
(if (eof-object? res) 9e99 res))
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/loadavg")
(lambda ()(list (read)(read)(read)))))
(with-input-from-file "/proc/loadavg"
(lambda ()(list (read)(read)(read))))))
(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
(let* ((loadavg (common:get-cpu-load remote-host))
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload numcpus))
(loadjmp (- first next)))
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
|
(map (lambda (res)
(if (eof-object? res) 9e99 res))
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/loadavg")
(lambda ()(list (read)(read)(read)))))
(with-input-from-file "/proc/loadavg"
(lambda ()(list (read)(read)(read))))))
;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns list (normalized-proc-load normalized-core-load 1m 5m 15m ncores nthreads)
;;
(define (common:get-normalized-cpu-load remote-host)
(let ((data (if remote-host
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end")
read-lines)
(append
(with-input-from-file "/proc/loadavg"
read-lines)
(with-input-from-file "/proc/cpuinfo"
read-lines)
(list "end"))))
(load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
(proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$"))
(core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$"))
(phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$"))
(max-num (lambda (p n)(max (string->number p) n))))
;; (print "data=" data)
(if (null? data) ;; something went wrong
#f
(let loop ((hed (car data))
(tal (cdr data))
(loads #f)
(proc-num 0) ;; processor includes threads
(phys-num 0) ;; physical chip on motherboard
(core-num 0)) ;; core
;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num)
(if (null? tal) ;; have all our data, calculate normalized load and return result
(let* ((act-proc (+ proc-num 1))
(act-phys (+ phys-num 1))
(act-core (+ core-num 1))
(adj-proc-load (/ (car loads) act-proc))
(adj-core-load (/ (car loads) act-core)))
(append (list (cons 'adj-proc-load adj-proc-load)
(cons 'adj-core-load adj-core-load))
(list (cons '1m-load (car loads))
(cons '5m-load (cadr loads))
(cons '15m-load (caddr loads)))
(list (cons 'proc act-proc)
(cons 'core act-core)
(cons 'phys act-phys))))
(regex-case
hed
(load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num))
(proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num))
(phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num))
(core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num)))
(else
(begin
;; (print "NO MATCH: " hed)
(loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))
(define (common:unix-ping hostname)
(let ((res (system (conc "ping -c 1 " hostname " > /dev/null"))))
(eq? res 0)))
;; ideally put all this info into the db, no need to preserve it across moving homehost
;;
(define (common:get-least-loaded-host hosts)
(if (null? hosts)
#f
;;
;; stategy:
;; sort by last-used and normalized-load
;; if last-updated > 15 seconds then re-update
;; take the host with the lowest load with the lowest last-used (i.e. not used for longest time)
;;
(let ((best-host #f)
(curr-time (current-seconds)))
(for-each
(lambda (hostname)
(let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f)))
(if h
h
(let ((h (make-host)))
(hash-table-set! *host-loads* hostname h)
h))))
;; if host hasn't been pinged in 15 sec update it's data
(ping-good (if (< (- curr-time (host-last-update rec)) 15)
(host-reachable rec)
(or (host-reachable rec)
(begin
(host-reachable-set! rec (common:unix-ping hostname))
(host-last-update-set! rec curr-time)
(host-last-cpuload-set! rec (common:get-normalized-cpu-load hostname))
(host-reachable rec))))))
(cond
((not best-host)
(set! best-host hostname))
((and ping-good
(< (alist-ref 'adj-core-load (host-last-cpuload rec))
(alist-ref 'adj-core-load
(host-last-cpuload (hash-table-ref *host-loads* best-host)))))
(set! best-host rec)))))
hosts)
best-host)))
(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
(let* ((loadavg (common:get-cpu-load remote-host))
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload numcpus))
(loadjmp (- first next)))
|
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
|
(query fetch-column
(sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
;;======================================================================
;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S
;;======================================================================
;;
;; [host-types]
;; general ssh #{getbgesthost general}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;;
;; [hosts]
;; general cubian xena
;;
;; [launchers]
;; envsetup general
;; xor/%/n 4C16G
;; % nbgeneral
;;
;; [jobtools]
;; launcher bsub
;; # if defined and not "no" flexi-launcher will bypass launcher unless there is no
;; # match.
;; flexi-launcher yes
(define (common:get-launcher configdat testname itempath)
(let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
(if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
(not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
(let* ((launchers (hash-table-ref/default configdat "launchers" '())))
(if (null? launchers)
fallback-launcher
(let loop ((hed (car launchers))
(tal (cdr launchers)))
(let ((patt (car hed))
(host-type (cadr hed)))
(if (tests:match patt testname itempath)
(begin
(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
(let ((launcher (configf:lookup configdat "host-types" host-type)))
(if launcher
launcher
(begin
(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
(if (null? tal)
fallback-launcher
(loop (car tal)(cdr tal)))))))
;; no match, try again
(if (null? tal)
|
|
|
|
>
|
|
>
>
<
|
<
|
>
>
>
>
>
>
|
|
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
|
(query fetch-column
(sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
;;======================================================================
;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S
;;======================================================================
;;
;; [hosts]
;; arm cubie01 cubie02
;; x86_64 zeus xena myth01
;; allhosts #{g hosts arm} #{g hosts x86_64}
;;
;; [host-types]
;; general #MTLOWESTLOAD #{g hosts allhosts}
;; arm #MTLOWESTLOAD #{g hosts arm}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;;
;; [launchers]
;; envsetup general
;; xor/%/n 4C16G
;; % nbgeneral
;;
;; [jobtools]
;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
;; flexi-launcher yes
;; launcher nbfake
;;
(define (common:get-launcher configdat testname itempath)
(let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
(if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
(not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
(let* ((launchers (hash-table-ref/default configdat "launchers" '())))
(if (null? launchers)
fallback-launcher
(let loop ((hed (car launchers))
(tal (cdr launchers)))
(let ((patt (car hed))
(host-type (cadr hed)))
(if (tests:match patt testname itempath)
(begin
(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
(let ((launcher (configf:lookup configdat "host-types" host-type)))
(if launcher
(let* ((launcher-parts (string-split launcher))
(launcher-exe (car launcher-parts)))
(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
(let ((targ-host (common:get-least-loaded-host (cdr launcher-parts))))
(conc "remrun " targ-host))
launcher))
(begin
(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
(if (null? tal)
fallback-launcher
(loop (car tal)(cdr tal)))))))
;; no match, try again
(if (null? tal)
|