Overview
Comment: | converging not-important changes. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-plus-ulex |
Files: | files | file ages | folders |
SHA1: |
3b0211bc015b5a4ecb19ce7cd8823b48 |
User & Date: | matt on 2020-11-04 21:04:54 |
Other Links: | branch diff | manifest | tags |
Context
2020-11-04
| ||
22:25 | More minor changes melded across (before the big stuff). Leaf check-in: fbb87e6b53 user: matt tags: v1.65-plus-ulex (unpublished) | |
21:04 | converging not-important changes. check-in: 3b0211bc01 user: matt tags: v1.65-plus-ulex (unpublished) | |
2020-08-22
| ||
14:14 | Merged in partial content from v1.66-captain-ulex work. ==7.2/0.9/1201/mars== check-in: 070ebd2d0d user: matt tags: v1.65-cleanup | |
Changes
Modified common.scm from [d232b0060b] to [d95f6303fc].
︙ | ︙ | |||
47 48 49 50 51 52 53 | ;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . ;; arguments - thunk, message (define (common:fail-safe thunk warning-message-on-exception) (handle-exceptions exn (begin | | > | > | | 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | ;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . ;; arguments - thunk, message (define (common:fail-safe thunk warning-message-on-exception) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn) (debug:print-info 0 *default-log-port* (string-substitute "\n?Error:" "nonfatal condition:" (with-output-to-string (lambda () (print-error-message exn) )))) (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") #f) (thunk))) (define getenv get-environment-variable) (define (safe-setenv key val) (if (or (substring-index "!" key) (substring-index ":" key) ;; variables containing : are for internal use and cannot be environment variables. (substring-index "." key)) ;; periods are not allowed in environment variables (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\" or starting with \"!\"") (if (and (string? val) (string? key)) (handle-exceptions exn (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val ", exn=" exn) (setenv key val)) (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)))) (define home (getenv "HOME")) (define user (getenv "USER")) |
︙ | ︙ | |||
504 505 506 507 508 509 510 | (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) (handle-exceptions exn (begin | | | 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 | (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) (handle-exceptions exn (begin (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (print-call-chain (current-error-port)) ;; ) (let* ((fullname (conc "logs/" file)) (mod-time (file-modification-time fullname)) (file-age (- (current-seconds) mod-time)) (file-old (> file-age (* 48 60 60))) |
︙ | ︙ | |||
539 540 541 542 543 544 545 | (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) (file-exists? fullname)) ;; just in case it was gzipped - will get it next time (handle-exceptions exn #f (if (directory? fullname) (begin | | | 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 | (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) (file-exists? fullname)) ;; just in case it was gzipped - will get it next time (handle-exceptions exn #f (if (directory? fullname) (begin (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") (inc-stat "directories")) (begin (delete-file* fullname) (inc-stat "deleted"))) (hash-table-delete! all-files file))))))) '() "logs") |
︙ | ︙ | |||
563 564 565 566 567 568 569 | (lambda (a b) (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) (- num-logs max-allowed)))) (for-each (lambda (file) (let* ((fullname (conc "logs/" file))) (if (directory? fullname) | | | | 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 | (lambda (a b) (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) (- num-logs max-allowed)))) (for-each (lambda (file) (let* ((fullname (conc "logs/" file))) (if (directory? fullname) (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") (handle-exceptions exn (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) (delete-file* fullname))))) files) (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; Do NOT check if not on homehost! ;; |
︙ | ︙ | |||
593 594 595 596 597 598 599 | ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") (handle-exceptions exn (begin | | | 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 | ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") (handle-exceptions exn (begin (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) (exit 1)) (common:cleanup-db dbstruct))) ((not (common:file-exists? mtconf)) (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") (exit 1)) |
︙ | ︙ | |||
704 705 706 707 708 709 710 | (define (common:read-encoded-string instr) (handle-exceptions exn (handle-exceptions exn (begin | | | 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 | (define (common:read-encoded-string instr) (handle-exceptions exn (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again |
︙ | ︙ | |||
942 943 944 945 946 947 948 | (define (common:get-db-tmp-area . junk) (if *db-cache-path* *db-cache-path* (if *toppath* ;; common:get-create-writeable-dir (handle-exceptions exn (begin | | | 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 | (define (common:get-db-tmp-area . junk) (if *db-cache-path* *db-cache-path* (if *toppath* ;; common:get-create-writeable-dir (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) (exit 1)) (let ((dbpath (common:get-create-writeable-dir (list (conc "/tmp/" (current-user-name) "/megatest_localdb/" (common:get-testsuite-name) "/" (string-translate *toppath* "/" ".")))))) ;; #t)))) (set! *db-cache-path* dbpath) |
︙ | ︙ | |||
1187 1188 1189 1190 1191 1192 1193 | (tal (cdr dirs))) (let ((res (or (and (directory? hed) (file-write-access? hed) hed) (handle-exceptions exn (begin | | > | 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 | (tal (cdr dirs))) (let ((res (or (and (directory? hed) (file-write-access? hed) hed) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road. exn=" exn) #f) (create-directory hed #t))))) (if (and (string? res) (directory? res)) res (if (null? tal) #f |
︙ | ︙ | |||
1331 1332 1333 1334 1335 1336 1337 | ;; does the directory exist and do we have write access? ;; ;; returns the directory or #f ;; (define (common:directory-writable? path-string) (handle-exceptions | | > > | | 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 | ;; does the directory exist and do we have write access? ;; ;; returns the directory or #f ;; (define (common:directory-writable? path-string) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "Failed to identify access to " path-string ", exn=" exn) #f) (if (and (directory-exists? path-string) (file-write-access? path-string)) path-string #f))) (define (common:get-linktree) (or (getenv "MT_LINKTREE") |
︙ | ︙ | |||
1425 1426 1427 1428 1429 1430 1431 | ;; first look in config, then look in file .homehost, create it if not found (homehost (or (configf:lookup *configdat* "server" "homehost" ) (handle-exceptions exn (if (> trynum 0) (let ((delay-time (* (- 5 trynum) 5))) (mutex-unlock! *homehost-mutex*) | | > > | > > | 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 | ;; first look in config, then look in file .homehost, create it if not found (homehost (or (configf:lookup *configdat* "server" "homehost" ) (handle-exceptions exn (if (> trynum 0) (let ((delay-time (* (- 5 trynum) 5))) (mutex-unlock! *homehost-mutex*) (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (thread-sleep! delay-time) (common:get-homehost trynum: (- trynum 1))) (begin (mutex-unlock! *homehost-mutex*) (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) (exit 1))) (let ((hhf (conc *toppath* "/.homehost"))) (if (common:file-exists? hhf) (with-input-from-file hhf read-line) (if (file-write-access? *toppath*) (begin (with-output-to-file hhf |
︙ | ︙ | |||
1685 1686 1687 1688 1689 1690 1691 | ;;====================================================================== ;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 ;; (define (common:lazy-modification-time fpath) (handle-exceptions exn | > > | > | > > > | | | | | | 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 | ;;====================================================================== ;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 ;; (define (common:lazy-modification-time fpath) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn) 0) (if (file-exists? fpath) (file-modification-time fpath) 0))) ;; find timestamp of newest file associated with a sqlite db file (define (common:lazy-sqlite-db-modification-time fpath) (let* ((glob-list (handle-exceptions exn (begin (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn) `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))) (glob (conc fpath "*")))) (file-list (if (eq? 0 (length glob-list)) '("/no/such/file") glob-list))) (apply max (map common:lazy-modification-time file-list)))) ;; return a nice clean pathname made absolute (define (common:nice-path dir) (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) (if match ;; using ~ for home? (common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match))) (normalize-pathname (if (absolute-pathname? dir) dir (conc (current-directory) "/" dir)))))) ;; make "nice-path" available in config files and the repl (define nice-path common:nice-path) (define (common:read-link-f path) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed. exn=" exn) path) ;; just give up (with-input-from-pipe (conc "/bin/readlink -f " path) (lambda () (read-line))))) (define (get-cpu-load #!key (remote-host #f)) |
︙ | ︙ | |||
1761 1762 1763 1764 1765 1766 1767 | exn (begin (debug:print-info 1 *default-log-port* " removing bad file " fullpath) (delete-file* fullpath) #f) (with-input-from-file fullpath read)) (begin | | > | > > | | | > > | | | | | > > | | | | | | | | | | | | | | | | | | | | | 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 | exn (begin (debug:print-info 1 *default-log-port* " removing bad file " fullpath) (delete-file* fullpath) #f) (with-input-from-file fullpath read)) (begin (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") #f)))) (begin (debug:print 2 *default-log-port* "not reading file " fullpath) #f))) #f)) (define (common:write-cached-info key dtype dat) (if *toppath* (let* ((fulldir (conc *toppath* "/.sysdata")) (fullpath (conc fulldir "/" key "-" dtype ".log"))) (if (not (file-exists? fulldir))(create-directory fulldir #t)) (handle-exceptions exn (begin (debug:print 0 *default-log-path* "failed to write file " fullpath ", exn=" exn) #f) (with-output-to-file fullpath (lambda ()(pp dat))))) #f)) (define (common:raw-get-remote-host-load remote-host) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) #f) ;; more specific handling of errors needed (with-input-from-pipe (conc "ssh " remote-host " cat /proc/loadavg") (lambda ()(list (read)(read)(read)))))) ;; get cpu load by reading from /proc/loadavg, return all three values ;; (define (common:get-cpu-load remote-host) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) '(-99 -99 -99)) (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) (or (common:get-cached-info actual-hostname "cpu-load") (let ((result (if remote-host (map (lambda (res) (if (eof-object? res) 9e99 res)) (with-input-from-pipe (conc "ssh " remote-host " cat /proc/loadavg") (lambda ()(list (read)(read)(read))))) (with-input-from-file "/proc/loadavg" (lambda ()(list (read)(read)(read))))))) (match result ((l1 l2 l3) (if (and (number? l1) (number? l2) (number? l3)) (begin (common:write-cached-info actual-hostname "cpu-load" result) result) '(-1 -1 -1))) ;; -1 is bad result (else '(-2 -2 -2)))))))) ;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads ;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. ;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load ;; (define (common:get-normalized-cpu-load remote-host) (let ((res (common:get-normalized-cpu-load-raw remote-host)) |
︙ | ︙ | |||
2095 2096 2097 2098 2099 2100 2101 | ;; is happening but not constantly report it (if (> (random 100) 75) ;; about 25% of the time (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) (cond ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable (> num-tries 0)) | | > | 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 | ;; is happening but not constantly report it (if (> (random 100) 75) ;; about 25% of the time (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp)) (cond ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable (> num-tries 0)) (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") (thread-sleep! 10) (common:wait-for-cpuload maxload-in numcpus-in waitdelay count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1))) ((and (> first adjmaxload) (> count 0)) (debug:print-info 0 *default-log-port* "server start delayed " adjwait |
︙ | ︙ | |||
2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 | "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" (let ((default '(("tag-expr" . "-tagexpr") ("mode-patt" . "-modepatt") ("run-name" . "-runname") ("contour" . "-contour") ("target" . "-target") ("test-patt" . "-testpatt") ("msg" . "-m") ("log" . "-log") ("start-dir" . "-start-dir") ("new" . "-set-state-status")))) (if (eq? flavor 'switch-symbol) (map (lambda (x) (cons (string->symbol (conc "-" (car x))) (cdr x))) | > > | 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 | "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" (let ((default '(("tag-expr" . "-tagexpr") ("mode-patt" . "-modepatt") ("run-name" . "-runname") ("contour" . "-contour") ("target" . "-target") ("test-patt" . "-testpatt") ("rerun" . "-rerun") ("setvars" . "-setvars") ("msg" . "-m") ("log" . "-log") ("start-dir" . "-start-dir") ("new" . "-set-state-status")))) (if (eq? flavor 'switch-symbol) (map (lambda (x) (cons (string->symbol (conc "-" (car x))) (cdr x))) |
︙ | ︙ | |||
3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 | (launcher-exe (car launcher-parts))) (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) (count 100)) (if targ-host (conc "remrun " targ-host) (if (> count 0) (begin (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) (thread-sleep! (- 101 count)) (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) (- count 1))) (begin (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) | > | 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 | (launcher-exe (car launcher-parts))) (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) (count 100)) (if targ-host (conc "remrun " targ-host) (if (> count 0) (begin (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) (thread-sleep! (- 101 count)) (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) (- count 1))) (begin (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) |
︙ | ︙ | |||
3340 3341 3342 3343 3344 3345 3346 | (define (common:join-backgrounded-threads) ;; may need to trap and ignore exceptions -- dunno how atomic threads are... (for-each (lambda (thread-name) (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) (if thread (handle-exceptions | | > > | | | 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 | (define (common:join-backgrounded-threads) ;; may need to trap and ignore exceptions -- dunno how atomic threads are... (for-each (lambda (thread-name) (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) (if thread (handle-exceptions exn (begin (debug:print 0 *default-log-port* "joining threads failed. exn=" exn) #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception (thread-join! thread)) ))) (hash-table-keys *common:thread-punchlist*))) (define *common:telemetry-log-state* 'startup) (define *common:telemetry-log-socket* #f) (define (common:telemetry-log-open) |
︙ | ︙ |