Overview
Comment: | subrun kill works but suboptimal (serial kill) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | 1.65-subrun-ancilliary-usecases |
Files: | files | file ages | folders |
SHA1: |
e010ede9bd89034197618e007c786c53 |
User & Date: | bjbarcla on 2017-12-27 16:24:12 |
Other Links: | branch diff | manifest | tags |
Context
2017-12-27
| ||
18:12 | parallelized removal of subruns check-in: a035fad97b user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
16:24 | subrun kill works but suboptimal (serial kill) check-in: e010ede9bd user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
2017-12-26
| ||
18:07 | wip; added hooks for subrun remove-run handling check-in: 05b23944bc user: bjbarcla tags: 1.65-subrun-ancilliary-usecases | |
Changes
Modified Makefile from [79ddb592c4] to [c37f1ed514].
︙ | ︙ | |||
89 90 91 92 93 94 95 96 97 98 99 100 101 102 | rpc-transport.o \ runconfig.o \ runs.o \ server.o \ tasks.o \ tdb.o \ tests.o \ tcmt : $(TCMTOBJS) tcmt.scm csc $(CSCOPTS) $(TCMTOBJS) tcmt.scm -o tcmt # install documentation to $(PREFIX)/docs # DOES NOT REBUILD DOCS | > | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | rpc-transport.o \ runconfig.o \ runs.o \ server.o \ tasks.o \ tdb.o \ tests.o \ subrun.o \ tcmt : $(TCMTOBJS) tcmt.scm csc $(CSCOPTS) $(TCMTOBJS) tcmt.scm -o tcmt # install documentation to $(PREFIX)/docs # DOES NOT REBUILD DOCS |
︙ | ︙ |
Modified runs.scm from [6aafacc0b0] to [089f325036].
︙ | ︙ | |||
2062 2063 2064 2065 2066 2067 2068 | (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path *fdb* ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree | | > > > | > > | > > > > > | > > | | 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 | (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path *fdb* ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (has-subrun (and (subrun:subrun-test-initialized? run-dir) (not (subrun:subrun-removed? run-dir)))) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat)) (uname (db:test-get-uname new-test-dat)) (toplevel-with-children (and (db:test-get-is-toplevel test) (> (rmt:test-toplevel-num-items run-id test-name) 0)))) (case action ((remove-runs) ;; if the test is a toplevel-with-children issue an error and do not remove (cond (toplevel-with-children (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) (if (> (hash-table-ref toplevel-retries test-fulln) 3) (if (not (null? tal)) (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries (let ((newtal (append tal (list test)))) (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue (has-subrun ;; BB TODO - manage toplevasel-retries hash and retries in general (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") (let* ((subrun-remove-succeeded (subrun:remove-subrun run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test))) (cond (subrun-remove-succeeded (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " as it has a subrun") (runs:remove-test-directory new-test-dat mode)) (else (let* ((logfile (subrun:get-log-path run-dir "remove"))) (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")))) (if (not (null? tal)) (loop (car tal)(cdr tal))))) (else (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? |
︙ | ︙ |
Modified subrun.scm from [4e6779a4e5] to [9da03e90bf].
1 2 3 4 5 6 7 8 9 10 11 12 13 | ;; Copyright 2006-2016, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) | | > | | | > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | ;; Copyright 2006-2016, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format call-with-environment-variables) (declare (unit subrun)) ;;(declare (uses runs)) (declare (uses db)) (declare (uses common)) ;;(declare (uses items)) ;;(declare (uses runconfig)) ;;(declare (uses tests)) ;;(declare (uses server)) (declare (uses mt)) ;;(declare (uses archive)) ;; (declare (uses filedb)) ;(include "common_records.scm") ;;(include "key_records.scm") (include "db_records.scm") ;; provides db:test-get-id ;;(include "run_records.scm") ;;(include "test_records.scm") (define (subrun:subrun-test-initialized? test-run-dir) (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) #t #f)) (define (subrun:subrun-removed? test-run-dir) (if (subrun:subrun-test-initialized? test-run-dir) (let ((flagfile (conc test-run-dir "/subrun.removed"))) (if (common:file-exists? flagfile) #t #f)) #t)) (define (subrun:set-subrun-removed test-run-dir) (let ((flagfile (conc test-run-dir "/subrun.removed"))) (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) (with-output-to-file flagfile (lambda () (print (current-seconds))))))) (define (subrun:testconfig-defines-subrun? testconfig) (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested (define (subrun:initialize-toprun-test testconfig test-run-dir) (let ((ra (configf:lookup testconfig "subrun" "run-area")) (logpro (configf:lookup testconfig "subrun" "logpro")) |
︙ | ︙ | |||
60 61 62 63 64 65 66 | (define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test) ;; set state/status of test item ;; fork off megatest ;; set state/status of test item ;; | | | < < < < < < < | | < | < < | < | > | | < | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | (define (subrun:remove-subrun test-run-dir new-test-dat test-name item-path test-state test-fulln toplevel-with-children test) ;; set state/status of test item ;; fork off megatest ;; set state/status of test item ;; (BB> "Entered subrun:remove-subrun with "test-fulln) (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) (let* ((remove-result (subrun:exec-sub-megatest test-run-dir "-remove-runs" "remove"))) (if remove-result (begin (subrun:set-subrun-removed test-run-dir) #t) #f)) #t)) (define (subrun:launch-cmd test-run-dir) (let* ((log-prefix "run") (switches (subrun:selector+log-switches test-run-dir log-prefix)) (run-wait #t) (cmd (conc "megatest -run "switches" " (if run-wait "-run-wait " "")))) |
︙ | ︙ | |||
167 168 169 170 171 172 173 | (map (lambda (x) (list (car x) (cdr x))) switch-alist)) " "))) res)) | | < < < | | > > > | > | | > > > > > > > > > > > | | > | 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | (map (lambda (x) (list (car x) (cdr x))) switch-alist)) " "))) res)) (define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) (cmd (conc "megatest " selector-switches " " action-switches-str )) (pid #f) (proc (lambda () (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) (call-with-environment-variables (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) (lambda () (common:without-vars proc "^MT_.*"))) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (if (eq? pid-val 0) (begin (thread-sleep! 2) (processloop (+ i 1))) (begin (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) (if (eq? 0 exit-code) (begin #t) (begin #f)))))))) ;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") |