Overview
Comment: | parallelized removal of subruns |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | 1.65-subrun-ancilliary-usecases |
Files: | files | file ages | folders |
SHA1: |
a035fad97b7ad66a7d566285045b6cc1 |
User & Date: | bjbarcla on 2017-12-27 18:12:04 |
Other Links: | branch diff | manifest | tags |
Context
2017-12-27
| ||
19:01 | updated to work with keep-records and updated manual Leaf check-in: 70391eee14 user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
18:12 | parallelized removal of subruns check-in: a035fad97b user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
16:24 | subrun kill works but suboptimal (serial kill) check-in: e010ede9bd user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
Changes
Modified common.scm from [bd48028047] to [ebc2b450b4].
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 | (prefix sqlite3 sqlite3:) pkts ) (declare (unit common)) (include "common_records.scm") ;; (require-library margs) ;; (include "margs.scm") ;; (define old-exit exit) ;; ;; (define (exit . code) | > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | (prefix sqlite3 sqlite3:) pkts ) (declare (unit common)) (include "common_records.scm") ;; (require-library margs) ;; (include "margs.scm") ;; (define old-exit exit) ;; ;; (define (exit . code) |
︙ | ︙ | |||
2626 2627 2628 2629 2630 2631 2632 | ((string? new-val) (setenv env-var new-val))) restore-thunk)) delta-env-alist)))) (let ((rv (thunk))) (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state rv))) | > > > > > > | 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 | ((string? new-val) (setenv env-var new-val))) restore-thunk)) delta-env-alist)))) (let ((rv (thunk))) (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state rv))) (define (common:send-thunk-to-background-thread thunk #!key (name #f)) ;;(BB> "launched thread " name) (if name (thread-start! (make-thread thunk name)) (thread-start! (make-thread thunk)))) |
Modified runs.scm from [089f325036] to [b511145c8a].
︙ | ︙ | |||
2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 | (dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin | > > > | 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 | (dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f)))))) (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests (test-retry-time (make-hash-table)) (backgrounded-remove-status (make-hash-table)) (backgrounded-remove-last-visit (make-hash-table)) (backgrounded-remove-result (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin |
︙ | ︙ | |||
2082 2083 2084 2085 2086 2087 2088 | (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (has-subrun | | > > > > > | > > > > | | > > > > > > > > > > > > > > > > > > > > > | | | > > | | | | | < | | > > > | 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 | (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (has-subrun ;; (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) (now (current-seconds)) (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) (case rem-status ((not-started) (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") (hash-table-set! backgrounded-remove-status test-fulln 'started) (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) (common:send-thunk-to-background-thread (lambda () (let* ((subrun-remove-succeeded (subrun:remove-subrun run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test))) (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) (hash-table-set! backgrounded-remove-status test-fulln 'done))) name: (conc "remove-subrun:"test-fulln)) ;; send to back of line, loop (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))) ) ((started) ;; if last visit was within last second, sleep 1 second (if (< (- now last-visit) 1.0) (thread-sleep! 1.0)) (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) ;; send to back of line, loop (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))) ) ((done) ;; drop this one; if remaining, loop, else finish (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) (cond ((eq? subrun-remove-succeeded 'exception) (let* ((logfile (subrun:get-log-path run-dir "remove"))) (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile))) (subrun-remove-succeeded (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") (runs:remove-test-directory new-test-dat mode)) (else (let* ((logfile (subrun:get-log-path run-dir "remove"))) (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")))) (if (not (null? tal)) (loop (car tal)(cdr tal))))) ) ; end case rem-status ) ; end let ); end cond has-subrun (else (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin |
︙ | ︙ |
Modified subrun.scm from [9da03e90bf] to [3571b59cfa].
︙ | ︙ | |||
75 76 77 78 79 80 81 | (define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test) ;; set state/status of test item ;; fork off megatest ;; set state/status of test item ;; | | | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | (define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test) ;; set state/status of test item ;; fork off megatest ;; set state/status of test item ;; ;;(BB> "Entered subrun:remove-subrun with "test-fulln) (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) (let* ((remove-result (subrun:exec-sub-megatest test-run-dir "-remove-runs" "remove"))) (if remove-result (begin (subrun:set-subrun-removed test-run-dir) #t) |
︙ | ︙ |