Overview
Comment: | sync the trunk with 1.65 latest |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
e13fd02294b69a14a650d1e69467ba2a |
User & Date: | matt on 2020-02-19 21:16:05 |
Other Links: | manifest | tags |
Context
2020-08-03
| ||
14:40 | Merging v1.65 to trunk check-in: 94cb260871 user: mrwellan tags: trunk | |
2020-05-31
| ||
22:34 | Merged v1.66 to trunk - it is now the stable leading edge. check-in: ea3cea484e user: mrwellan tags: trunk-v1.66 | |
2020-02-19
| ||
21:16 | sync the trunk with 1.65 latest check-in: e13fd02294 user: matt tags: trunk | |
2020-02-12
| ||
12:40 | Fixed modulo typo check-in: 7458dc962f user: jmoon18 tags: v1.65, v.16543 | |
2019-12-08
| ||
16:46 | Merged long-running v1.65 branch to trunk check-in: a11b626717 user: matt tags: trunk | |
Changes
Modified common.scm from [4250e720f5] to [22542b9de5].
︙ | |||
1671 1672 1673 1674 1675 1676 1677 | 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 | - + - + + - + + | ;; (car load-res)) ;; cpu-load)) ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 5)) |
︙ | |||
1742 1743 1744 1745 1746 1747 1748 | 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 | - + | (else default)))) (define (common:get-normalized-cpu-load-raw remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost (or (common:get-cached-info actual-host "normalized-load") (let ((data (if remote-host (with-input-from-pipe |
︙ | |||
1764 1765 1766 1767 1768 1769 1770 | 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 | - + | #f (let loop ((hed (car data)) (tal (cdr data)) (loads #f) (proc-num 0) ;; processor includes threads (phys-num 0) ;; physical chip on motherboard (core-num 0)) ;; core |
︙ | |||
1917 1918 1919 1920 1921 1922 1923 | 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + | (if new-best (begin ;; found a host, return it (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) (host-last-used-set! rec curr-time) new-best) (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) |
︙ |
Modified megatest-version.scm from [3dd6b10f75] to [8e7922af4c].
︙ | |||
16 17 18 19 20 21 22 | 16 17 18 19 20 21 22 23 | - + | ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) |
Modified rmt.scm from [fff4f6b45b] to [8c67a7dcdf].
︙ | |||
20 21 22 23 24 25 26 | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | - + - + | (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (include "common_records.scm") |
︙ | |||
56 57 58 59 60 61 62 | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | - + + + + + + + + + + + + | (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected |
︙ | |||
101 102 103 104 105 106 107 | 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | - - - + + + | (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } |
︙ | |||
929 930 931 932 933 934 935 | 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + | (define (rmt:test-set-archive-block-id run-id test-id archive-block-id) (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) (define (rmtmod:calc-ro-mode runremote *toppath*) (if (and runremote (remote-ro-mode-checked runremote)) (remote-ro-mode runremote) (let* ((dbfile (conc *toppath* "/megatest.db")) (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future (if runremote (begin (remote-ro-mode-set! runremote ro-mode) (remote-ro-mode-checked-set! runremote #t) ro-mode) ro-mode)))) (define (extras-readonly-mode rmt-mutex log-port cmd params) (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 3") (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) #f) (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (mutex-lock! *rmt-mutex*) (remote-conndat-set! runremote #f) (http-transport:close-connections area-dat: runremote) (remote-server-url-set! runremote #f) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) (define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) (if (and (vector? res) (eq? (vector-length res) 2) (eq? (vector-ref res 1) 'overloaded)) ;; since we are ;; looking at the ;; data to carry the ;; error we'll use a ;; fairly obtuse ;; combo to minimise ;; the chances of ;; some sort of ;; collision. this ;; is the case where ;; the returned data ;; is bad or the ;; server is ;; overloaded and we ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") (mutex-lock! *rmt-mutex*) (http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res |
Modified rmtmod.scm from [8f5d01960e] to [7098937896].
︙ | |||
54 55 56 57 58 59 60 | 54 55 56 57 58 59 60 61 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | (set! debug:print dbgp) (set! debug:print-info dbgpinfo) (set! remote-ro-mode ro-mode) (set! remote-ro-mode-set! ro-mode-set) (set! remote-ro-mode-checked-set! ro-mode-checked-set) (set! remote-ro-mode-checked ro-mode-checked)) |
Modified runs.scm from [81684437ca] to [650378342c].
︙ | |||
1015 1016 1017 1018 1019 1020 1021 | 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 | - | (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) '()))) (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! (not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner |
︙ | |||
1117 1118 1119 1120 1121 1122 1123 1124 1125 | 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 | + + + - - + + + + | (and (member 'toplevel testmode) ;; 'toplevel) (null? non-completed) (not (member 'exclusive testmode))))) ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing ;; jobtools maxload is useful for where the full Megatest run is done on one machine |
︙ | |||
1741 1742 1743 1744 1745 1746 1747 1748 1749 | 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 | + + + + + - - - - - - + + + + + + + + + + + + + - | (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step ;; (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader ;; ;; NEED to reprocess testconfig here, ensuring that item variables are available. ;; This is for Tal's issue with item-specific env vars not being set for use in skip. ;; HSD https://hsdes.intel.com/appstore/icf/index.html#/article?articleId=1408763273 ;; (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) |
︙ | |||
1879 1880 1881 1882 1883 1884 1885 | 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 | + - - - - + + + + + + - - - - + + + + + + - + | ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) ;; run-ids = #f means *all* runs (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ;; split the string and OR of file-exists? |
︙ | |||
1938 1939 1940 1941 1942 1943 1944 | 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 | - + | ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) (else (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) (else |
︙ |
Modified server.scm from [78810e8804] to [b1a1de6e53].
| 1 2 3 4 5 6 7 | - |
|
︙ | |||
149 150 151 152 153 154 155 | 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | - + + | (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time |
︙ |