Changes In Branch split-run-tests Excluding Merge-Ins
This is equivalent to a diff from 82c6f56ccd to 71615efe0a
2013-06-23
| ||
19:45 | Bringing runcontrol back on to development, step one Closed-Leaf check-in: 7b55df5b3d user: matt tags: runcontrol | |
2013-05-02
| ||
16:42 | merged some refactoring back to the v1.54 branch check-in: c4ec0f7362 user: mrwellan tags: v1.54 | |
2013-05-01
| ||
09:39 | Split run queue processing into classic and new and added control in setup section. Default is classic unless runqueue is set to a number check-in: 609ed2d8d2 user: mrwellan tags: refactor | |
09:00 | Splitting off run-tests-queue to included file Closed-Leaf check-in: 71615efe0a user: mrwellan tags: split-run-tests | |
2013-04-30
| ||
07:48 | Partially working launch on more than 100 registered tests check-in: 6fd8d3ef4d user: matt tags: refactor | |
2013-04-29
| ||
23:40 | Cleanup/minor refactor, removed unneeded delays check-in: 82c6f56ccd user: matt tags: v1.54 | |
20:22 | Added safety net to test control panel for when db disappears via deletion check-in: 2b48e9b190 user: matt tags: v1.54 | |
Added run-tests-queue-classic.scm version [fa6ef4a36c].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue-classic run-id runname test-records keyvallst flags test-patts) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registery (make-hash-table)) (registery-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1)))) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (if (not (null? sorted-test-names)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reruns '())) (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (string->symbol m) 'normal))) (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) (newtal (append tal (list hed)))) (debug:print 6 "test-name: " test-name "\n hed: " hed "\n itemdat: " itemdat "\n items: " items "\n item-path: " item-path "\n waitons: " waitons "\n num-retries: " num-retries "\n tal: " tal "\n reruns: " reruns) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (member test-name waitons) (begin (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (let* ((run-limits-info (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) (prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) ", ") " fails: " fails) (debug:print-info 4 "hed=" hed "\n test-record=" test-record "\n test-name: " test-name "\n item-path: " item-path "\n test-patts: " test-patts) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 "run-limits-info = " run-limits-info) (cond ;; INNER COND #1 for a launchable test ;; Check item path against item-patts ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) ;; Registery has been started for this test but has not yet completed ;; this should be rare, the case where there are only a couple of tests and the db is slow ;; delay a short while and continue ;; ((eq? (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f) 'start) ;; (thread-sleep! 0.01) ;; (loop (car newtal)(cdr newtal) reruns)) ;; count number of 'done, if more than 100 then skip on through. (;; (and (< (length (filter (lambda (x)(eq? x 'done))(hash-table-values test-registery))) 100) ;; why get more than 200 ahead? (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; NEED TO THREADIFY THIS (let ((th (make-thread (lambda () (mutex-lock! registery-mutex) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'start) (mutex-unlock! registery-mutex) ;; If haven't done it before register a top level test if this is an itemized test (if (not (eq? (hash-table-ref/default test-registery (runs:make-full-test-name test-name "") #f) 'done)) (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) (mutex-lock! registery-mutex) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registery-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) ;; TRY (thread-sleep! *global-delta*) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (loop (car newtal)(cdr newtal) reruns)) ;; At this point *all* test registrations must be completed. ((not (null? (filter (lambda (x)(eq? 'start x))(hash-table-values test-registery)))) (debug:print-info 0 "Waiting on test registrations: " (string-intersperse (filter (lambda (x) (eq? (hash-table-ref/default test-registery x #f) 'start)) (hash-table-keys test-registery)) ", ")) (thread-sleep! 0.1) (loop hed tal reruns)) ((not have-resources) ;; simply try again after waiting a second (debug:print-info 1 "no resources to run new tests, waiting ...") ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. (thread-sleep! 1) ;; (+ 2 *global-delta*)) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (loop (car newtal)(cdr newtal) reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id runname keyvallst test-record flags #f) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) (else ;; must be we have unmet prerequisites (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. (if (null? fails) (begin ;; couldn't run, take a breather (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient ;; we made new tal by sticking hed at the back of the list (loop (car newtal)(cdr newtal) reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'removed) (loop (car tal)(cdr tal) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop hed tal reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) (> (length items) 0) (> (length (car items)) 0)) (pp items)) (for-each (lambda (my-itemdat) (let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat))) (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! (let ((newtestname (runs:make-full-test-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath items) (if (not (null? tal)) (begin (debug:print-info 4 "End of items list, looping with next after short delay") ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (runs:can-run-more-tests test-record max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed) "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode "\n num-retries: " num-retries "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n can-run-more: " can-run-more) ;; (thread-sleep! (+ 0.01 *global-delta*)) (cond ;; INNER COND #2 ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch (and (eq? testmode 'toplevel) (null? non-completed))) (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (set-megatest-env-vars run-id) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) ;; (thread-sleep! *global-delta*) (loop hed tal reruns)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") ;; only increment num-retries when there are no tests runing (if (eq? 0 (list-ref can-run-more 1)) (begin ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost ;; TRY (thread-sleep! (+ 2 *global-delta*)) ;; TRY (thread-sleep! (+ 0.01 *global-delta*))) (set! num-retries (+ num-retries 1)))) (if (> num-retries max-retries) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns)) (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? ((and (not (null? fails))(eq? testmode 'normal)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (if (not (null? tal)) (begin ;; (thread-sleep! *global-delta*) (loop (car tal)(cdr tal)(cons hed reruns))))) (else (debug:print 8 "ERROR: No handler for this condition.") ;; TRY (thread-sleep! (+ 1 *global-delta*)) (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE ;; if can't run more just loop with next possible test (begin (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) ;; (thread-sleep! (+ 2 *global-delta*)) (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 *global-delta*)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) ((not (null? tal)) (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) (else (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) )))) ;; LET* ((test-record ;; we get here on "drop through" - loop for next test in queue ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!! (debug:print-info 1 "All tests launched") (thread-sleep! 0.5) ;; FIXME! This harsh exit should not be necessary.... ;; (if (not *runremote*)(exit)) ;; #f)) ;; return a #f as a hint that we are done ;; Here we need to check that all the tests remaining to be run are eligible to run ;; and are not blocked by failed |
Modified runs.scm from [2a43a8e327] to [151e77d8dc].
︙ | ︙ | |||
319 320 321 322 323 324 325 | (conc t) (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) (define (runs:make-full-test-name testname itempath) (if (equal? itempath "") testname (conc testname "/" itempath))) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | (conc t) (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) (define (runs:make-full-test-name testname itempath) (if (equal? itempath "") testname (conc testname "/" itempath))) (include "run-tests-queue-classic.scm") ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step (define (run:test run-id runname keyvallst test-record flags parent-test) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (test-conf (tests:testqueue-get-testconfig test-record)) |
︙ | ︙ |