︙ | | |
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
|
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see <http://www.gnu.org/licenses/>.
;;======================================================================
(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
format dot-locking csv-xml z3 udp ;; sql-de-lite
hostinfo md5 message-digest typed-records directory-utils stack
matchable regex posix (srfi 18) extras ;; tcp
(prefix nanomsg nmsg:)
(prefix sqlite3 sqlite3:)
pkts (prefix dbi dbi:)
;; (use srfi-1 data-structures posix regex-case (prefix base64 base64:)
;; format dot-locking csv-xml z3 udp ;; sql-de-lite
;; hostinfo md5 message-digest typed-records directory-utils stack
;; matchable regex posix (srfi 18) extras ;; tcp
;; (prefix nanomsg nmsg:)
;; (prefix sqlite3 sqlite3:)
;; pkts (prefix dbi dbi:)
)
(declare (unit common))
;; (declare (uses commonmod))
;; (import commonmod)
(include "common_records.scm")
;; )
;;
;; (declare (unit common))
;; ;; (declare (uses commonmod))
;; ;; (import commonmod)
;;
;; (include "common_records.scm")
;; (require-library margs)
;; (include "margs.scm")
;; (define old-exit exit)
;;
|
︙ | | |
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
+
|
(define *server-kind-run* (make-hash-table))
(define *home-host* #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex* (make-mutex))
(define *api-process-request-count* 0)
(define *max-api-process-requests* 0)
(define *server-overloaded* #f)
(define *writes-total-delay* 0)
;; client
(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex
;; RPC transport
(define *rpc:listener* #f)
|
︙ | | |
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
|
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
-
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
|
(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex* (make-mutex))
;; Miscellaneous
(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers
(define *numcpus-cache* (make-hash-table))
(use posix-extras pathname-expand files)
;; (use posix-extras pathname-expand files)
;; this plugs a hole in posix-extras in recent chicken versions > 4.9)
(let-values (( (chicken-release-number chicken-major-version)
(apply values
(map string->number
(take
(string-split (chicken-version) ".")
2)))))
(let ((resolve-pathname-broken?
(or (> chicken-release-number 4)
(and (eq? 4 chicken-release-number) (> chicken-major-version 9)))))
(if resolve-pathname-broken?
(define ##sys#expand-home-path pathname-expand))))
;; (let-values (( (chicken-release-number chicken-major-version)
;; (apply values
;; (map string->number
;; (take
;; (string-split (chicken-version) ".")
;; 2)))))
;; (let ((resolve-pathname-broken?
;; (or (> chicken-release-number 4)
;; (and (eq? 4 chicken-release-number) (> chicken-major-version 9)))))
;; (if resolve-pathname-broken?
;; (define ##sys#expand-home-path pathname-expand))))
(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) ))
;; (define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) ))
;; (define (realpath x)(pathname-expand (or x "/dev/null")) )
(define (realpath x)
(with-input-from-pipe
(string-append "readlink -f \""x"\"")
read-line))
(define (common:get-this-exe-fullpath #!key (argv (argv)))
(let* ((this-script
(cond
((and (> (length argv) 2)
(string-match "^(.*/csi|csi)$" (car argv))
(string-match "^-(s|ss|sx|script)$" (cadr argv)))
|
︙ | | |
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
|
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
|
-
+
|
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
(if (common:on-homehost?)
(if (common:api-changed?)
(let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
(dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
(read-only (not (file-write-access? dbfile)))
(read-only (not (file-writable? dbfile)))
(dbstruct (db:setup #t)))
(debug:print 0 *default-log-port*
"WARNING: Version mismatch!\n"
" expected: " (common:version-signature) "\n"
" got: " (common:get-last-run-version))
(cond
((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
|
︙ | | |
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
|
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
|
-
+
|
;;
(define (common:get-create-writeable-dir dirs)
(if (null? dirs)
#f
(let loop ((hed (car dirs))
(tal (cdr dirs)))
(let ((res (or (and (directory? hed)
(file-write-access? hed)
(file-writable? hed)
hed)
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "could not create " hed
", this might cause problems down the road. exn=" exn)
#f)
|
︙ | | |
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
|
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
|
-
+
|
(define (common:directory-writable? path-string)
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "Failed to identify access to " path-string ", exn=" exn)
#f)
(if (and (directory-exists? path-string)
(file-write-access? path-string))
(file-writable? path-string))
path-string
#f)))
(define (common:get-linktree)
(or (getenv "MT_LINKTREE")
(if *configdat*
(configf:lookup *configdat* "setup" "linktree")
|
︙ | | |
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
|
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
|
-
+
|
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
((condition-property-accessor 'exn 'message) exn))
(exit 1)))
(let ((hhf (conc *toppath* "/.homehost")))
(if (common:file-exists? hhf)
(with-input-from-file hhf read-line)
(if (file-write-access? *toppath*)
(if (file-writable? *toppath*)
(begin
(with-output-to-file hhf
(lambda ()
(print bestadrs)))
(begin
(mutex-unlock! *homehost-mutex*)
(car (common:get-homehost))))
|
︙ | | |
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
|
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
|
-
+
|
(if *toppath*
(let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))
(delfile (lambda (exn)
(debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn)
(delete-file* fullpath)
#f)))
(if (and (file-exists? fullpath)
(file-read-access? fullpath))
(file-readable? fullpath))
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn)
#f)
(debug:print 2 *default-log-port* "reading file " fullpath)
(let ((real-age (- (current-seconds)
|
︙ | | |
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
|
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
|
-
+
|
(hh (if hh-dat (car hh-dat) #f)))
(common:wait-for-normalized-load maxnormload msg hh)))
(define (common:get-num-cpus remote-host)
(let* ((actual-host (or remote-host (get-host-name))))
;; hosts had better not be changing the number of cpus too often!
(or (hash-table-ref/default *numcpus-cache* actual-host #f)
(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (pseudo-random-integer 3600)))
(let* ((proc (lambda ()
(let loop ((numcpu 0)
(inl (read-line)))
(if (eof-object? inl)
(if (> numcpu 0)
numcpu
#f) ;; if zero return #f so caller knows that things are not working
|
︙ | | |
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
|
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
|
-
+
|
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
(let ((num-cpus (common:get-num-cpus remote-host)))
(if num-cpus
(common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host)
(begin
(thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again
(thread-sleep! (pseudo-random-integer 60)) ;; we failed to get num cpus. wait a bit and try again
(if (> rem-tries 0)
(common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1))
#f)))))
;;======================================================================
;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load
;; count - count down to zero, at some point we'd give up if the load never drops
|
︙ | | |
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
|
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
|
-
+
|
", normalized effective load: " normalized-effective-load
))
;; overloaded and count expired (i.e. went to zero)
(else
(if (> num-tries 0) ;; should be "num-tries-left".
(if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host))
(debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of "
effective-normalized-load " continuing."))
normalized-effective-load " continuing."))
(debug:print 0 *default-log-port* "Load on " effective-host ", "
first" could not be retrieved. Giving up and continuing."))))))
;;======================================================================
;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load
;;
;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5))
|
︙ | | |
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
|
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
|
-
+
-
+
|
;; ;; at least use 1
;; (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next?
;; 0
;; next))) ;; we will force a conservative calculation any time next is large.
;; (first-next-avg (/ (+ first next) 2))
;; ;; add some randomness to the time to break any alignment
;; ;; where netbatch dumps many jobs to machines simultaneously
;; (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)
;; (adjwait (min (+ 300 (pseudo-random-integer 10)) (abs (* (+ (pseudo-random-integer 10)
;; (/ (- 1000 count) 10)
;; waitdelay)
;; (- first adjmaxload) ))))
;; (load-jump-limit (configf:lookup-number *configdat* "setup" "load-jump-limit"))
;; ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit
;; ;; etc.
;; (effective-load (common:get-intercept first next))
;; (effective-host (or remote-host "localhost"))
;; (normalized-effective-load (/ effective-load numcpus))
;; (will-wait (> normalized-effective-load maxload)))
;;
;; ;; let's let the user know once in a long while that load checking
;; ;; is happening but not constantly report it
;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time
;; #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (pseudo-random-integer 100) 75) ;; about 25% of the time
;; (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
;; ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
;;
;; (debug:print-info 1 *default-log-port*
;; "On host: " effective-host
;; ", effective load: " effective-load
;; ", numcpus: " numcpus
|
︙ | | |
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
|
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
|
-
+
-
+
|
(lambda (disk-num)
(let* ((dirpath (cadr (assoc disk-num disks)))
(freespc (cond
((not (directory? dirpath))
(if (common:low-noise-print 300 "disks not a dir " disk-num)
(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
-1)
((not (file-write-access? dirpath))
((not (file-writable? dirpath))
(if (common:low-noise-print 300 "disks not writeable " disk-num)
(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
-1)
((not (eq? (string-ref dirpath 0) #\/))
(if (common:low-noise-print 300 "disks not a proper path " disk-num)
(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
-1)
(else
(get-df dirpath))))
(free-inodes (cond
((not (directory? dirpath))
(if (common:low-noise-print 300 "disks not a dir " disk-num)
(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
-1)
((not (file-write-access? dirpath))
((not (file-writable? dirpath))
(if (common:low-noise-print 300 "disks not writeable " disk-num)
(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
-1)
((not (eq? (string-ref dirpath 0) #\/))
(if (common:low-noise-print 300 "disks not a proper path " disk-num)
(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
-1)
|
︙ | | |
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
|
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
|
-
+
|
(for-each
(lambda (pktsdir) ;; look at all
(cond
((not (common:file-exists? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist."))
((not (directory? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory."))
((not (file-read-access? pktsdir))
((not (file-readable? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable."))
(else
(debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir)
(let ((pkts (glob (conc pktsdir "/*.pkt"))))
(for-each
(lambda (pkt)
(let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
|
︙ | | |
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
|
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
(begin
(debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
#t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
(thread-join! thread))
)))
(hash-table-keys *common:thread-punchlist*)))
(define (dtests:get-pre-command #!key (default-override #f))
(let* ((orig-pre-command "export CMD='")
(viewscreen-pre-command "viewscreen ")
(use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen"))
(default-pre-command (if use-viewscreen viewscreen-pre-command orig-pre-command))
(cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command")))
(or cfg-ovrd default-override default-pre-command))) ;; "xterm -geometry 180x20 -e \""))
(define (dtests:get-post-command #!key (default-override #f))
(let* ((orig-post-command (conc "';xterm -geometry 180x20 -e \"(echo; echo -n START:;date +ww%U.%w-$H:%M:%S;echo;echo $CMD;echo;$CMD)|&"
"tee -a runlog-`date +ww%U.%w-%H:%M`.log;echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))
(viewscreen-post-command "")
(use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen"))
(default-post-command (if use-viewscreen viewscreen-post-command orig-post-command))
(cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command")))
(or cfg-ovrd default-override default-post-command))) ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
;;======================================================================
;; (define *common:telemetry-log-state* 'startup)
;; (define *common:telemetry-log-socket* #f)
;;
;; (define (common:telemetry-log-open)
;; (if (eq? *common:telemetry-log-state* 'startup)
;; (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
|
︙ | | |