Overview
Comment: | Tweaked delays to reduce db load. Shrunk maxretries to match |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | v1.507 |
Files: | files | file ages | folders |
SHA1: |
da9a8edadbcab047c17cc513e1e0e381 |
User & Date: | mrwellan on 2012-10-17 20:11:28 |
Other Links: | manifest | tags |
Context
2012-10-18
| ||
16:39 | Cherry picked the test matching (test vs. test/) fix to trunk check-in: 517fde8bdf user: mrwellan tags: trunk, v1.508 | |
14:56 | Added fine grained control over debug:print printing and added speciallized printer debug:print-info check-in: b1669bc3d1 user: mrwellan tags: enhanced-debug | |
2012-10-17
| ||
20:11 | Tweaked delays to reduce db load. Shrunk maxretries to match check-in: da9a8edadb user: mrwellan tags: trunk, v1.507 | |
15:21 | Another tweak to how -itempatt works with new matching. check-in: 273fe5758e user: mrwellan tags: trunk, v1.505 | |
Changes
Modified db.scm from [42a639cdea] to [a4f8dba59f].
︙ | ︙ | |||
56 57 58 59 60 61 62 | (define (open-db) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) (let* ((dbpath (conc *toppath* "/megatest.db")) ;; fname) (dbexists (file-exists? dbpath)) (db (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) | | | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | (define (open-db) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) (let* ((dbpath (conc *toppath* "/megatest.db")) ;; fname) (dbexists (file-exists? dbpath)) (db (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) 136000)))) ;; 136000))) ;; 136000 = 2.2 minutes (debug:print 4 "INFO: dbpath=" dbpath) (sqlite3:set-busy-handler! db handler) (if (not dbexists) (db:initialize db)) (db:set-sync db) db)) |
︙ | ︙ |
Modified runs.scm from [b4bca3f412] to [8169338d90].
︙ | ︙ | |||
407 408 409 410 411 412 413 | (and max-concurrent-jobs (> (- max-concurrent-jobs num-running) 5))) (debug:print 4 "INFO: Pre-registering test " test-name "/" item-path " to create placeholder" ) (open-run-close db:tests-register-test #f run-id test-name item-path) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t) (thread-sleep! *global-delta*) (loop (car newtal)(cdr newtal) reruns)) ((not have-resources) ;; simply try again after waiting a second | < > | | | 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 | (and max-concurrent-jobs (> (- max-concurrent-jobs num-running) 5))) (debug:print 4 "INFO: Pre-registering test " test-name "/" item-path " to create placeholder" ) (open-run-close db:tests-register-test #f run-id test-name item-path) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t) (thread-sleep! *global-delta*) (loop (car newtal)(cdr newtal) reruns)) ((not have-resources) ;; simply try again after waiting a second (debug:print 1 "INFO: no resources to run new tests, waiting ...") (thread-sleep! (+ 0.01 *global-delta*)) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (loop (car newtal)(cdr newtal) reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id runname keyvallst test-record flags #f) (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) (else ;; must be we have unmet prerequisites (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. (if (null? fails) (begin ;; couldn't run, take a breather (debug:print 4 "INFO: Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient ;; we made new tal by sticking hed at the back of the list (loop (car newtal)(cdr newtal) reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") (thread-sleep! *global-delta*) (loop (car tal)(cdr tal) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (thread-sleep! (+ 0.01 *global-delta*)) (loop hed tal reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (if (and (>= *verbosity* 1) (> (length items) 0) |
︙ | ︙ | |||
465 466 467 468 469 470 471 | (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath items) (if (not (null? tal)) (begin | < | > | 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 | (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath items) (if (not (null? tal)) (begin (debug:print 4 "INFO: End of items list, looping with next after short delay") (thread-sleep! (+ 0.01 *global-delta*)) (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (open-run-close runs:can-run-more-tests #f test-record))) (if can-run-more |
︙ | ︙ | |||
489 490 491 492 493 494 495 | "\n testmode: " testmode "\n num-retries: " num-retries "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n can-run-more: " can-run-more) | | < > > > > | | | | 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 | "\n testmode: " testmode "\n num-retries: " num-retries "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n can-run-more: " can-run-more) ;; (thread-sleep! (+ 0.01 *global-delta*)) (cond ;; INNER COND #2 ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch (and (eq? testmode 'toplevel) (null? non-completed))) (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (open-run-close set-megatest-env-vars #f run-id) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) (thread-sleep! *global-delta*) (loop hed tal reruns)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) (debug:print 4 "INFO: fails is null, moving on in the queue but keeping " hed " for now") ;; only increment num-retries when there are no tests runing (if (eq? 0 (list-ref can-run-more 1)) (begin (if (> num-retries 100) ;; first 100 retries are low time cost (thread-sleep! (+ 2 *global-delta*)) (thread-sleep! (+ 0.01 *global-delta*))) (set! num-retries (+ num-retries 1)))) (if (> num-retries max-retries) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns)) (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? ((and (not (null? fails))(eq? testmode 'normal)) (debug:print 1 "INFO: test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (if (not (null? tal)) (begin (thread-sleep! *global-delta*) (loop (car tal)(cdr tal)(cons hed reruns))))) (else (debug:print 8 "ERROR: No handler for this condition.") (thread-sleep! (+ 1 *global-delta*)) (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE ;; if can't run more just loop with next possible test (begin (debug:print 4 "INFO: processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) (thread-sleep! (+ 1 *global-delta*)) (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (let* ((newlst (open-run-close tests:filter-non-runnable #f run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print 4 "INFO: full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) (thread-sleep! (+ 1 *global-delta*)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) ((not (null? tal)) (debug:print 4 "INFO: I'm pretty sure I shouldn't get here.")) (else (debug:print 4 "INFO: Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) |
︙ | ︙ |
Modified tests/fullrun/megatest.config from [359ec4dac2] to [5d94f21968].
︙ | ︙ | |||
13 14 15 16 17 18 19 | [setup] # FULL or 2, NORMAL or 1, OFF or 0 synchronous OFF # Throttle roughly scales the db access milliseconds to seconds delay throttle 0.2 # Max retries allows megatest to re-check that a tests status has changed # as tests can have transient FAIL status occasionally | | | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | [setup] # FULL or 2, NORMAL or 1, OFF or 0 synchronous OFF # Throttle roughly scales the db access milliseconds to seconds delay throttle 0.2 # Max retries allows megatest to re-check that a tests status has changed # as tests can have transient FAIL status occasionally maxretries 20 [validvalues] state start end status pass fail n/a 0 1 running # These are set before all tests, override them # in the testconfig [pre-launch-env-overrides] section |
︙ | ︙ |