Overview
Comment: | wip; added hooks for subrun remove-run handling |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | 1.65-subrun-ancilliary-usecases |
Files: | files | file ages | folders |
SHA1: |
05b23944bc99acfed408faa1fb9e847a |
User & Date: | bjbarcla on 2017-12-26 18:07:07 |
Other Links: | branch diff | manifest | tags |
Context
2017-12-27
| ||
16:24 | subrun kill works but suboptimal (serial kill) check-in: e010ede9bd user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
2017-12-26
| ||
18:07 | wip; added hooks for subrun remove-run handling check-in: 05b23944bc user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
16:14 | restored launch functionality using subrun under updated subrun architecture check-in: b1363320bf user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
Changes
Modified runs.scm from [6620831073] to [6aafacc0b0].
︙ | ︙ | |||
2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 | (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path *fdb* ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat)) (uname (db:test-get-uname new-test-dat)) (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (cond (toplevel-with-children (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (else (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? | > > > > > > > | 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 | (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path *fdb* ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (has-subrun (subrun:subrun-test-initialized? run-dir)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat)) (uname (db:test-get-uname new-test-dat)) (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (cond (toplevel-with-children (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (has-subrun ;; BB TODO - manage toplevasel-retries hash and retries in general (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test) ) (else (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? |
︙ | ︙ | |||
2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 | (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((set-state-status) (debug:print-info 2 *default-log-port* "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) (if (common:file-exists? ddir) (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal)) | > > > | 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 | (loop (car tal)(append tal (list new-test-dat))))) (begin (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) (if (not (null? tal)) (loop (car tal)(cdr tal))))))) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ((set-state-status) ;; BB TODO - manage has-subrun case (debug:print-info 2 *default-log-port* "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) ;; BB TODO - manage has-subrun case (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests))))) ((archive) ;; BB TODO - manage has-subrun case (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) (if (common:file-exists? ddir) (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal)) |
︙ | ︙ |
Modified subrun.scm from [5fbd7e058e] to [4e6779a4e5].
︙ | ︙ | |||
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | ;(include "common_records.scm") ;;(include "key_records.scm") ;;(include "db_records.scm") ;;(include "run_records.scm") ;;(include "test_records.scm") (define (subrun:initialize-toprun-test testconfig test-run-dir) (let ((ra (configf:lookup testconfig "subrun" "run-area")) (logpro (configf:lookup testconfig "subrun" "logpro")) (symlink-target (conc test-run-dir "/subrun-area")) ) (when (not ra) ;; when runarea is not set we default to *toppath*. However ;; we need to force the setting in the testconfig so it will ;; be preserved in the testconfig.subrun file (configf:set-section-var testconfig "subrun" "runarea" *toppath*)) (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun (if (common:file-exists? symlink-target) (delete-file symlink-target)) (create-symbolic-link ra symlink-target) (configf:write-alist testconfig "testconfig.subrun"))) (define (subrun:launch-cmd test-run-dir) (let* ((log-prefix "run") (switches (subrun:selector+log-switches test-run-dir log-prefix)) (run-wait #t) (cmd (conc "megatest -run "switches" " (if run-wait "-run-wait " "")))) cmd)) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < < | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | ;(include "common_records.scm") ;;(include "key_records.scm") ;;(include "db_records.scm") ;;(include "run_records.scm") ;;(include "test_records.scm") (define (subrun:subrun-test-initialized? test-run-dir) (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) #t #f)) (define (subrun:testconfig-defines-subrun? testconfig) (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested (define (subrun:initialize-toprun-test testconfig test-run-dir) (let ((ra (configf:lookup testconfig "subrun" "run-area")) (logpro (configf:lookup testconfig "subrun" "logpro")) (symlink-target (conc test-run-dir "/subrun-area")) ) (when (not ra) ;; when runarea is not set we default to *toppath*. However ;; we need to force the setting in the testconfig so it will ;; be preserved in the testconfig.subrun file (configf:set-section-var testconfig "subrun" "runarea" *toppath*)) (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun (if (common:file-exists? symlink-target) (delete-file symlink-target)) (create-symbolic-link ra symlink-target) (configf:write-alist testconfig "testconfig.subrun"))) (define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test) ;; set state/status of test item ;; fork off megatest ;; set state/status of test item ;; (let* ((subrun-alist (subrun:selector+log-alist test-run-dir log-prefix)) (runlog (alist-ref "-log" subrun-alist equal? #f))) (if (not (common:file-exists? runlog)) (BB> "no runlog @ "runlog) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. ;; call in submegatest: ;; (tasks:kill-runner target run-name testpatt) (mt:test-set-state-status-by-id run-id (db:test-get-id test) "SUBRUN-KILLREQ" "n/a" #f) ) ;; on success: ;; set state of test, or delete it or whatever ) ) ) (define (subrun:launch-cmd test-run-dir) (let* ((log-prefix "run") (switches (subrun:selector+log-switches test-run-dir log-prefix)) (run-wait #t) (cmd (conc "megatest -run "switches" " (if run-wait "-run-wait " "")))) cmd)) (define (subrun:selector+log-alist test-run-dir log-prefix) (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) (subrunfile (conc test-run-dir "/testconfig.subrun" )) (subrundata (with-input-from-file subrunfile read)) (subrunconfig (configf:alist->config subrundata)) (run-area (configf:lookup subrunconfig "subrun" "run-area")) (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf (get-environment-variable "MT_RUN_AREA_HOME") |
︙ | ︙ | |||
115 116 117 118 119 120 121 122 123 | (switch-alist (cons (cons "-log" logfile) (map (lambda (item) (if (equal? (car item) "-testpatt") (cons "-testpatt" testpatt) item)) switch-alist-pre)))) ;; note - get precmd from subrun section ;; apply to submegatest commands | > > > > > > > > > | | | | | | | | | | < | 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | (switch-alist (cons (cons "-log" logfile) (map (lambda (item) (if (equal? (car item) "-testpatt") (cons "-testpatt" testpatt) item)) switch-alist-pre)))) switch-alist)) ;; note - get precmd from subrun section ;; apply to submegatest commands (define (subrun:get-log-path test-run-dir log-prefix) (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) (res (alist-ref "-log" alist equal? #f))) res)) (define (subrun:selector+log-switches test-run-dir log-prefix) (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) (res (string-intersperse (apply append (map (lambda (x) (list (car x) (cdr x))) switch-alist)) " "))) res)) (define (subrun:exec-sub-megatest test-run-dir switches #!key (logfile #f)) (let* ((real-logfile (or logfile (conc (test-run-dir) "/subrun-" (string-substitute "[/*]" "_" (string-intersperse switches "^"))"-" (number->string (current-seconds)) ".log"))) (selector-switches (common:sub-megatest-selector-switches test-run-dir)) (cmd-list `("megatest" ,@selector-switches ,@switches "-log" ,real-logfile)) |
︙ | ︙ |