Overview
Comment: | Added waits to steps, get rid of defunct server stuff (let them die natural deaths) ==7.34/1.5/WARN/1202== |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-cleanup |
Files: | files | file ages | folders |
SHA1: |
f22d7f8cb6542bd9c3314590ae84720b |
User & Date: | mrwellan on 2020-08-29 09:22:21 |
Original Comment: | Added waits to steps, get rid of defunct server stuff (let them die natural deaths) |
Other Links: | branch diff | manifest | tags |
Context
2020-08-30
| ||
00:14 | Gate when first detect that all job slots are in use. No point in looping until slots are availble. ==7.1/1.6/WARN/1201/mars== check-in: cd1100885b user: matt tags: v1.65-cleanup | |
2020-08-29
| ||
09:22 | Added waits to steps, get rid of defunct server stuff (let them die natural deaths) ==7.34/1.5/WARN/1202== check-in: f22d7f8cb6 user: mrwellan tags: v1.65-cleanup | |
06:30 | force server exit only if [setup] runtime is set ==3.7/1.0/PASS/mars== check-in: 7951d54072 user: matt tags: v1.65-cleanup | |
Changes
Modified common.scm from [15370e151d] to [3722d67826].
︙ | ︙ | |||
1811 1812 1813 1814 1815 1816 1817 | ;; cpu-load)) ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 10)) (if *toppath* | | > > > > | > > > > > > > > > > | < | | | | > | 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 | ;; cpu-load)) ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 10)) (if *toppath* (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) (delfile (lambda () (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn) (delete-file* fullpath) #f))) (if (and (file-exists? fullpath) (file-read-access? fullpath)) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn) #f) (debug:print 2 *default-log-port* "reading file " fullpath) (let ((real-age (- (current-seconds) (handle-exceptions exn (begin (debug:print 1 *default-log-port* "Failed to read mod time on file " fullpath ", using 0, exn=" exn) 0) (file-change-time fullpath))))) (if (< real-age age) (handle-exceptions exn (delfile) (let* ((res (with-input-from-file fullpath read))) (if (eof-object? res) (begin (delfile) #f) res))) (begin (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") #f)))) (begin (debug:print 2 *default-log-port* "not reading file " fullpath) #f))) #f)) (define (common:write-cached-info key dtype dat) |
︙ | ︙ | |||
2100 2101 2102 2103 2104 2105 2106 2107 2108 | (define (common:wait-for-homehost-load maxnormload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) | > > > | | | | | | | | | | | | | | | | | | | | | > > | 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 | (define (common:wait-for-homehost-load maxnormload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) (define *numcpus-cache* (make-hash-table)) (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; hosts had better not be changing the number of cpus too often! (or (hash-table-ref/default *numcpus-cache* actual-host #f) (let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) (let* ((proc (lambda () (let loop ((numcpu 0) (inl (read-line))) (if (eof-object? inl) (if (> numcpu 0) numcpu #f) ;; if zero return #f so caller knows that things are not working (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line)))))) (result (if remote-host (with-input-from-pipe (conc "ssh " remote-host " cat /proc/cpuinfo") proc) (with-input-from-file "/proc/cpuinfo" proc)))) (if (and (number? result) (> result 0)) (common:write-cached-info actual-host "num-cpus" result)) result)))) (hash-table-set! *numcpus-cache* actual-host numcpus) numcpus)))) ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) (let ((num-cpus (common:get-num-cpus remote-host))) (if num-cpus (common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host) |
︙ | ︙ |
Modified http-transport.scm from [6328aa908d] to [67489ed9ab].
︙ | ︙ | |||
564 565 566 567 568 569 570 | (full-serv-fname (conc *toppath* "/logs/" serv-fname)) (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) (debug:print 0 *default-log-port* msg) (if (common:file-exists? full-serv-fname) (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) (exit))))) | | | | 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 | (full-serv-fname (conc *toppath* "/logs/" serv-fname)) (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) (debug:print 0 *default-log-port* msg) (if (common:file-exists? full-serv-fname) (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) (exit))))) #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago (not server-starting)) (begin (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") (exit))) ;; lets not even bother to start if there are already three or more server files ready to go #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) (if (> num-alive 3) (begin (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) (exit)))) (common:save-pkt `((action . start) (T . server) (pid . ,(current-process-id))) |
︙ | ︙ |
Modified megatest.scm from [06490b6ac8] to [0e58f17e0f].
︙ | ︙ | |||
2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 | (rmt:teststep-set-status! run-id test-id step state status (or comment msg) logfile)) (begin (debug:print-error 0 *default-log-port* "You must specify :state and :status with every call to -step") (exit 6)))))) (if (args:get-arg "-step") (begin (megatest:step (args:get-arg "-step") (or (args:get-arg "-state")(args:get-arg ":state")) (or (args:get-arg "-status")(args:get-arg ":status")) (args:get-arg "-setlog") (args:get-arg "-m")) ;; (if db (sqlite3:finalize! db)) | > | > | 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 | (rmt:teststep-set-status! run-id test-id step state status (or comment msg) logfile)) (begin (debug:print-error 0 *default-log-port* "You must specify :state and :status with every call to -step") (exit 6)))))) (if (args:get-arg "-step") (begin (thread-sleep! 1.5) (megatest:step (args:get-arg "-step") (or (args:get-arg "-state")(args:get-arg ":state")) (or (args:get-arg "-status")(args:get-arg ":status")) (args:get-arg "-setlog") (args:get-arg "-m")) ;; (if db (sqlite3:finalize! db)) (set! *didsomething* #t) (thread-sleep! 1.5))) (if (or (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status ;; (not (args:get-arg "-step"))) ;; -setlog may have been processed already in the "-step" previous ;; NEW POLICY - -setlog sets test overall log on every call. (args:get-arg "-set-toplog") (args:get-arg "-test-status") (args:get-arg "-set-values") |
︙ | ︙ |