Overview
Comment: | Some cleanup on the run-away open files fix |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.70 |
Files: | files | file ages | folders |
SHA1: |
505cde89e4310cd5871c28bbc24bc040 |
User & Date: | matt on 2022-11-04 02:12:41 |
Other Links: | branch diff | manifest | tags |
Context
2022-11-05
| ||
21:04 | Removed or updated prints to get bare-prints passing in ext-tests. check-in: 429f76ae7e user: matt tags: v1.70 | |
2022-11-04
| ||
12:09 | Changed megatest version to 1.7009 check-in: b9e8eb19c3 user: mmgraham tags: v1.70, v1.7009 | |
02:12 | Some cleanup on the run-away open files fix check-in: 505cde89e4 user: matt tags: v1.70 | |
2022-11-03
| ||
20:36 | Possible fixes to run-away open files check-in: 38e7dc2f46 user: matt tags: v1.70 | |
Changes
Modified common.scm from [3b2eb62756] to [8364d1a2fe].
︙ | ︙ | |||
1803 1804 1805 1806 1807 1808 1809 | (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) '(-99 -99 -99)) (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) (or (common:get-cached-info actual-hostname "cpu-load") (let ((result (if remote-host (map (lambda (res) (if (eof-object? res) 9e99 res)) | < | < | 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 | (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) '(-99 -99 -99)) (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) (or (common:get-cached-info actual-hostname "cpu-load") (let ((result (if remote-host (map (lambda (res) (if (eof-object? res) 9e99 res)) (common:raw-get-remote-host-load remote-host)) (with-input-from-file "/proc/loadavg" (lambda ()(list (read)(read)(read))))))) (match result ((l1 l2 l3) (if (and (number? l1) (number? l2) |
︙ | ︙ | |||
1848 1849 1850 1851 1852 1853 1854 | ((eq? res #f) default) ;; this would be the #eof (else default)))) (define (common:get-normalized-cpu-load-raw remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost (or (common:get-cached-info actual-host "normalized-load") (let ((data (if remote-host | > > > > | > | | > > | 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 | ((eq? res #f) default) ;; this would be the #eof (else default)))) (define (common:get-normalized-cpu-load-raw remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost (or (common:get-cached-info actual-host "normalized-load") (let ((data (if remote-host (let ((inp #f)) (handle-exceptions exn (begin (close-input-port inp) '()) (set! inp (open-input-port (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\""))) (let* ((res (read-lines inp))) (close-input-port inp) res))) (append (with-input-from-file "/proc/loadavg" read-lines) (with-input-from-file "/proc/cpuinfo" read-lines) (list "end")))) (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) |
︙ | ︙ | |||
2051 2052 2053 2054 2055 2056 2057 | numcpu #f) ;; if zero return #f so caller knows that things are not working (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line)))))) (result (if remote-host | | | > > > > > > > > > > > > > > > > > | 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 | numcpu #f) ;; if zero return #f so caller knows that things are not working (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line)))))) (result (if remote-host (common:generic-ssh (conc "ssh " remote-host " cat /proc/cpuinfo") proc -1) (with-input-from-file "/proc/cpuinfo" proc)))) (if (and (number? result) (> result 0)) (common:write-cached-info actual-host "num-cpus" result)) result)))) (hash-table-set! *numcpus-cache* actual-host numcpus) numcpus)))) (define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f)) (let ((inp #f)) (handle-exceptions exn (begin (close-input-port inp) (if msg-proc (msg-proc) (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn)) default) (set! inp (open-input-pipe ssh-command)) (with-input-from-port inp (lambda () (let ((res (proc))) (close-input-port inp) res)))))) ;;====================================================================== ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5)) (let ((num-cpus (common:get-num-cpus remote-host))) (if num-cpus |
︙ | ︙ |
Modified launch.scm from [c3fcd3bfc9] to [9b744a8e91].
︙ | ︙ | |||
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 | (output (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) #t)) (define (launch:is-test-alive host pid) (let* ((same-host (equal? host (get-host-name))) (cmd (conc (if same-host "" (conc "ssh "host" ")) "pstree -A "pid))) (if (and host pid (not (equal? host "n/a"))) | > > > > > | > | 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 | (output (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) #t)) ;; this is a close duplicate of: ;; process:alist-on-host? ;; process:alive ;; (define (launch:is-test-alive host pid) (let* ((same-host (equal? host (get-host-name))) (cmd (conc (if same-host "" (conc "ssh "host" ")) "pstree -A "pid))) (if (and host pid (not (equal? host "n/a"))) (let* ((output (if same-host (with-input-from-pipe cmd read-lines) (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) #t))) ;; assuming bad query is about a live test is likely not the right thing to do? (define (launch:kill-tests-if-dead run-id) |
︙ | ︙ |
Modified process.scm from [f9dfbe5500] to [63f5286965].
︙ | ︙ | |||
193 194 195 196 197 198 199 | (common:file-exists? (conc "/proc/" pid)) (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) (and (number? rpid) (equal? rpid pid))))) (define (process:alive-on-host? host pid) (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) | > > > | | | | | | | | | | | | | | | > > > > > | 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | (common:file-exists? (conc "/proc/" pid)) (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) (and (number? rpid) (equal? rpid pid))))) (define (process:alive-on-host? host pid) (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) (common:generic-ssh cmd ;; ;; handle-exceptions ;; exn ;; (begin ;; (debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn) ;; #f) ;; anything goes wrong - assume the process in NOT running. ;; (with-input-from-pipe ;; cmd (lambda () (let loop ((inl (read-line))) (if (eof-object? inl) #f (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl)) (innum (string->number clean-str))) (and innum (eq? pid innum)))))) #f (lambda () (debug:print 0 *default-log-port* "failed to identify if process " pid", on host "host" is alive. exn="exn))))) (define (process:get-sub-pids pid) (with-input-from-pipe (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid) (lambda () (let loop ((inl (read-line)) (res '())) |
︙ | ︙ |