︙ | | |
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
-
+
|
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see <http://www.gnu.org/licenses/>.
;;======================================================================
(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
format dot-locking csv-xml z3 ;; sql-de-lite
format dot-locking csv-xml z3 udp ;; sql-de-lite
hostinfo md5 message-digest typed-records directory-utils stack
matchable regex posix (srfi 18) extras ;; tcp
(prefix nanomsg nmsg:)
(prefix sqlite3 sqlite3:)
pkts (prefix dbi dbi:)
)
|
︙ | | |
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
+
|
(define (get-file-descriptor-count #!key (pid (current-process-id )))
(list
(length (glob (conc "/proc/" pid "/fd/*")))
(length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*")))))
)
)
;; GLOBALS
;; CONTEXTS
(defstruct cxt
(taskdb #f)
(cmutex (make-mutex)))
|
︙ | | |
335
336
337
338
339
340
341
342
343
344
345
346
347
348
|
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
;; (if full
'(dejunk)
;; '())
)
(if (common:api-changed?)
(common:set-last-run-version)))
(define (common:snapshot-file filepath #!key (subdir ".") )
(if (file-exists? filepath)
(let* ((age-sec (lambda (file)
(if (file-exists? file)
(- (current-seconds) (file-modification-time file))
1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist.
(ok-flag #t)
(age-mins (lambda (file) (/ (age-sec file) 60)))
(age-hrs (lambda (file) (/ (age-mins file) 60)))
(age-days (lambda (file) (/ (age-hrs file) 24)))
(age-wks (lambda (file) (/ (age-days file) 7)))
(docmd (lambda (cmd)
(cond
(ok-flag
(let ((res (system cmd)))
(cond
((eq? 0 res)
#t)
(else
(set! ok-flag #f)
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code "
(if (< res 0)
res
(/ res 8)) " ["cmd"]" )
#f))))
(else
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]")
#f))))
(copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'"))))
(copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'"))))
(fullpath (realpath filepath))
(basedir (pathname-directory fullpath))
(basefile (pathname-strip-directory fullpath))
;;(prevfile (conc filepath ".prev.gz"))
(minsfile (conc basedir "/" subdir "/" basefile ".mins.gz"))
(hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz"))
(daysfile (conc basedir "/" subdir "/" basefile ".days.gz"))
(wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz")))
;; create subdir it not exists
(if (not (directory-exists? (conc basedir "/" subdir)))
(docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'")))
;; copy&zip <file> to <file>.mins if not exists
(if (not (file-exists? minsfile))
(copy+zip filepath minsfile))
;; copy <file>.mins to <file>.hrs if not exists
(if (not (file-exists? hrsfile))
(copy minsfile hrsfile))
;; copy <file>.hrs to <file>.days if not exists
(if (not (file-exists? daysfile))
(copy hrsfile daysfile))
;; copy <file>.days to <file>.weeks if not exists
(if (not (file-exists? wksfile))
(copy daysfile wksfile))
;; if age(<file>.mins.gz) >= 1h:
;; copy <file>.mins.gz <file>.hrs.gz
;; copy <prev file> <file>.mins.gz
(when (>= (age-mins minsfile) 1)
(copy minsfile hrsfile)
(copy+zip filepath minsfile))
;; if age(<file>.hrs.gz) >= 1d:
;; copy <file>.hrs.gz <file>.days.gz
;; copy <file>.mins.gz <file>.hrs.gz
(when (>= (age-days hrsfile) 1)
(copy hrsfile daysfile)
(copy minsfile hrsfile))
;; if age(<file>.days.gz) >= 1w:
;; copy <file>.days.gz <file>.weeks.gz
;; copy <file>.hrs.gz <file>.days.gz
(when (>= (age-wks daysfile) 1)
(copy daysfile wksfile)
(copy hrsfile daysfile))
#t)
#f))
;; Rotate logs, logic:
;; if > 500k and older than 1 week:
;; remove previous compressed log and compress this log
;; WARNING: This proc operates assuming that it is in the directory above the
;; logs directory you wish to log-rotate.
;;
(define (common:rotate-logs)
|
︙ | | |
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
|
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
|
+
+
+
+
+
+
-
-
|
;; (and ohh srv)))
;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
(define (common:human-time)
(time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
(thread-sleep! 0.05) ;; delay for startup
(debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
;; sync megatest.db to /tmp/.../megatst.db
(let* ((sync-cool-off-duration 3)
(golden-mtdb (dbr:dbstruct-mtdb dbstruct))
(golden-mtpath (db:dbdat-get-path golden-mtdb))
|
︙ | | |
799
800
801
802
803
804
805
806
807
808
809
810
811
812
|
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
|
+
|
(debug:print-info 13 *default-log-port* "loading writable-watchdog.")
(server:writable-watchdog dbstruct)))
(debug:print-info 13 *default-log-port* "watchdog done."))
(debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
(define (std-exit-procedure)
;;(common:telemetry-log-close)
(on-exit (lambda () 0))
;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
(let ((no-hurry (if *time-to-exit* ;; hurry up
#f
(begin
(set! *time-to-exit* #t)
#t))))
|
︙ | | |
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
|
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
|
-
+
-
+
|
;; first look in config, then look in file .homehost, create it if not found
(homehost (or (configf:lookup *configdat* "server" "homehost" )
(handle-exceptions
exn
(if (> trynum 0)
(let ((delay-time (* (- 5 trynum) 5)))
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn))
(thread-sleep! delay-time)
(common:get-homehost trynum: (- trynum 1)))
(begin
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn))
(exit 1)))
(let ((hhf (conc *toppath* "/.homehost")))
(if (common:file-exists? hhf)
(with-input-from-file hhf read-line)
(if (file-write-access? *toppath*)
(begin
(with-output-to-file hhf
|
︙ | | |
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
|
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
|
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
|
exn
#f
(with-output-to-file fullpath (lambda ()(pp dat))))))
;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
(handle-exceptions
exn
'(99 99 99)
(let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
(or (common:get-cached-info actual-hostname "cpu-load")
(let ((result (if remote-host
(map (lambda (res)
(if (eof-object? res) 9e99 res))
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/loadavg")
(let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
(or (common:get-cached-info actual-hostname "cpu-load")
(let ((result (if remote-host
(map (lambda (res)
(if (eof-object? res) 9e99 res))
(with-input-from-pipe
(conc "ssh " remote-host " cat /proc/loadavg")
(lambda ()(list (read)(read)(read)))))
(with-input-from-file "/proc/loadavg"
(lambda ()(list (read)(read)(read)))))))
(common:write-cached-info actual-hostname "cpu-load" result)
result))))
(with-input-from-file "/proc/loadavg"
(lambda ()(list (read)(read)(read)))))))
(common:write-cached-info actual-hostname "cpu-load" result)
result)))))
;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
(let ((res (common:get-normalized-cpu-load-raw remote-host))
|
︙ | | |
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
|
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
'()))
;; clear vars matching pattern, run proc, set vars back
;; if proc is a string run that string as a command with
;; system.
;;
(define *common:orig-env*
(let ((envvars (get-environment-variables)))
(if (get-environment-variable "MT_ORIG_ENV")
(with-input-from-string
(z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV")))
read)
(filter-map (lambda (x)
(if (string-match "^MT_.*" (car x))
#f
x))
envvars))))
(define (common:with-orig-env proc)
(let ((current-env (get-environment-variables)))
(for-each (lambda (x) (unsetenv (car x))) current-env)
(for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*)
(let ((rv (cond
((string? proc)(system proc))
(proc (proc)))))
(for-each (lambda (x) (unsetenv (car x))) *common:orig-env*)
(for-each (lambda (x) (setenv (car x) (cdr x))) current-env)
rv)))
(define (common:without-vars proc . var-patts)
(let ((vars (make-hash-table)))
(for-each
(lambda (vardat) ;; each env var
(for-each
(lambda (var-patt)
(if (string-match var-patt (car vardat))
|
︙ | | |
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
|
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
|
-
+
-
-
-
+
+
+
+
|
(hash-table-for-each
vars
(lambda (var val)
(setenv var val)))
vars))
(define (common:run-a-command cmd #!key (with-vars #f))
(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f))
(let* ((pre-cmd (dtests:get-pre-command))
(post-cmd (dtests:get-post-command))
(fullcmd (if (or pre-cmd post-cmd)
(conc pre-cmd cmd post-cmd)
(conc "viewscreen " cmd))))
(debug:print-info 02 *default-log-port* "Running command: " fullcmd)
(if with-vars
(common:without-vars cmd)
(common:without-vars fullcmd "MT_.*"))))
(cond
(with-vars (common:without-vars fullcmd))
(with-orig-env (common:with-orig-env fullcmd))
(else (common:without-vars fullcmd "MT_.*")))))
;;======================================================================
;; T I M E A N D D A T E
;;======================================================================
;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
|
︙ | | |
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
|
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
|
+
+
+
+
|
#t)
#f))
;; simple lock. improve and converge on this one.
;;
(define (common:simple-lock keyname)
(rmt:no-sync-get-lock keyname))
(define (common:simple-unlock keyname #!key (force #f))
(rmt:no-sync-del! keyname))
;;======================================================================
;;
;;======================================================================
(define (common:in-running-test?)
(and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO")))
|
︙ | | |
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
|
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
|
-
+
|
(if all-good
(let ((cmddat (make-qitem
command: command
host-port: host-port
params: params)))
(queue-push cmddat) ;; put request into the queue
(nn-send soc "queued")) ;; reply with "queued"
(print "ERROR: BAD request " dat))
(print "ERROR: ["(common:human-time)"] BAD request " dat))
(loop (nn-recv soc)))))
(nn-close soc)))
;;======================================================================
;; D A S H B O A R D U S E R V I E W S
|
︙ | | |
2933
2934
2935
2936
2937
2938
2939
|
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
(if thread
(handle-exceptions
exn
#t ;; just ignore it, it might have died in the meantime so joining it will throw an exception
(thread-join! thread))
)))
(hash-table-keys *common:thread-punchlist*)))
(define *common:telemetry-log-state* 'startup)
(define *common:telemetry-log-socket* #f)
(define (common:telemetry-log-open)
(if (eq? *common:telemetry-log-state* 'startup)
(let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
(serverport (configf:lookup-number *configdat* "telemetry" "port"))
(user (or (get-environment-variable "USER") "unknown"))
(host (or (get-environment-variable "HOST") "unknown")))
(set! *common:telemetry-log-state*
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
'broken)
(if (and serverhost serverport user host)
(let* ((s (udp-open-socket)))
;;(udp-bind! s #f 0)
(udp-connect! s serverhost serverport)
(set! *common:telemetry-log-socket* s)
'open)
'not-needed))))))
(define (common:telemetry-log event #!key (payload '()))
(if (eq? *common:telemetry-log-state* 'startup)
(common:telemetry-log-open))
(if (eq? 'open *common:telemetry-log-state*)
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)")
;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose)
;;(common:telemetry-log-close)
(define *common:telemetry-log-state* 'broken-or-no-server)
(set! *common:telemetry-log-socket* #f)
)
(if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events
(let* ((user (or (get-environment-variable "USER") "unknown"))
(host (or (get-environment-variable "HOST") "unknown"))
(start (conc "[megatest "event"]"))
(toppath (or *toppath* "/dev/null"))
(payload-serialized
(base64:base64-encode
(z3:encode-buffer
(with-output-to-string (lambda () (pp payload))))))
(msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
toppath":"payload-serialized)))
(udp-send *common:telemetry-log-socket* msg))))))
(define (common:telemetry-log-close)
(when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
(handle-exceptions
exn
(begin
(define *common:telemetry-log-state* 'closed-fail)
(debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
)
(begin
(define *common:telemetry-log-state* 'closed)
(udp-close-socket *common:telemetry-log-socket*)
(set! *common:telemetry-log-socket* #f)))))
|