Overview
Comment: | Cleanup/minor refactor, removed unneeded delays |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.54 |
Files: | files | file ages | folders |
SHA1: |
82c6f56ccd205d021eafd78a039a7335 |
User & Date: | matt on 2013-04-29 23:40:07 |
Other Links: | branch diff | manifest | tags |
Context
2013-06-23
| ||
19:45 | Bringing runcontrol back on to development, step one Closed-Leaf check-in: 7b55df5b3d user: matt tags: runcontrol | |
2013-05-02
| ||
16:42 | merged some refactoring back to the v1.54 branch check-in: c4ec0f7362 user: mrwellan tags: v1.54 | |
2013-05-01
| ||
09:00 | Splitting off run-tests-queue to included file Closed-Leaf check-in: 71615efe0a user: mrwellan tags: split-run-tests | |
2013-04-30
| ||
07:48 | Partially working launch on more than 100 registered tests check-in: 6fd8d3ef4d user: matt tags: refactor | |
2013-04-29
| ||
23:40 | Cleanup/minor refactor, removed unneeded delays check-in: 82c6f56ccd user: matt tags: v1.54 | |
20:22 | Added safety net to test control panel for when db disappears via deletion check-in: 2b48e9b190 user: matt tags: v1.54 | |
Changes
Modified db.scm from [939db76f6c] to [03085d1877].
︙ | ︙ | |||
1947 1948 1949 1950 1951 1952 1953 | ;;====================================================================== ;; the new prereqs calculation, looks also at itempath if specified ;; all prereqs must be met: ;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met ;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met ;; | < | | | 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 | ;;====================================================================== ;; the new prereqs calculation, looks also at itempath if specified ;; all prereqs must be met: ;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met ;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met ;; ;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) ;; mode 'toplevel means that tests must be COMPLETED only ;; mode 'itemmatch means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] ;; (define (db:get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal)) (if (or (not waitons) (null? waitons)) '() (let* ((unmet-pre-reqs '()) (result '())) (for-each (lambda (waitontest-name) ;; by getting the tests with matching name we are looking only at the matching test ;; and related sub items (let ((tests (cdb:remote-run db:get-tests-for-run #f run-id waitontest-name '() '())) (ever-seen #f) (parent-waiton-met #f) (item-waiton-met #f)) (for-each (lambda (test) ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ... (let* ((state (db:test-get-state test)) |
︙ | ︙ |
Modified runs.scm from [e0752394d2] to [2a43a8e327].
︙ | ︙ | |||
116 117 118 119 120 121 122 | (define (set-item-env-vars itemdat) (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) (define *last-num-running-tests* 0) | > > > | | | | | > > < < < < < < < < | | < < < | 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | (define (set-item-env-vars itemdat) (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) (define *last-num-running-tests* 0) ;; Every time can-run-more-tests is called increment the delay ;; if the cou (define *runs:can-run-more-tests-count* 0) (define (runs:shrink-can-run-more-tests-count) (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) (define (runs:can-run-more-tests test-record max-concurrent-jobs) (thread-sleep! (cond ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "requirements" "jobgroup")) (num-running (cdb:remote-run db:get-count-tests-running #f)) (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) (if (not (eq? *last-num-running-tests* num-running)) (begin (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! *last-num-running-tests* num-running))) (if (not (eq? 0 *globalexitstatus*)) (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) (let ((can-not-run-more (cond |
︙ | ︙ | |||
334 335 336 337 338 339 340 | ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registery (make-hash-table)) (registery-mutex (make-mutex)) (num-retries 0) | | > > > > | 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registery (make-hash-table)) (registery-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1)))) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (if (not (null? sorted-test-names)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reruns '())) (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) |
︙ | ︙ | |||
374 375 376 377 378 379 380 | (if (member test-name waitons) (begin (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) | | | | 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 | (if (member test-name waitons) (begin (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (let* ((run-limits-info (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) (prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) |
︙ | ︙ | |||
428 429 430 431 432 433 434 | (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) (mutex-lock! registery-mutex) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registery-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) | | | | 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 | (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) (mutex-lock! registery-mutex) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registery-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) ;; TRY (thread-sleep! *global-delta*) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (loop (car newtal)(cdr newtal) reruns)) ;; At this point *all* test registrations must be completed. ((not (null? (filter (lambda (x)(eq? 'start x))(hash-table-values test-registery)))) (debug:print-info 0 "Waiting on test registrations: " (string-intersperse (filter (lambda (x) (eq? (hash-table-ref/default test-registery x #f) 'start)) (hash-table-keys test-registery)) |
︙ | ︙ | |||
453 454 455 456 457 458 459 | (loop (car newtal)(cdr newtal) reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id runname keyvallst test-record flags #f) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'running) | | | | | 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 | (loop (car newtal)(cdr newtal) reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id runname keyvallst test-record flags #f) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) (else ;; must be we have unmet prerequisites (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. (if (null? fails) (begin ;; couldn't run, take a breather (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient ;; we made new tal by sticking hed at the back of the list (loop (car newtal)(cdr newtal) reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'removed) (loop (car tal)(cdr tal) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop hed tal reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) |
︙ | ︙ | |||
514 515 516 517 518 519 520 | (debug:print-info 4 "End of items list, looping with next after short delay") ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) | | | | 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 | (debug:print-info 4 "End of items list, looping with next after short delay") ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (runs:can-run-more-tests test-record max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed) "\n fails: " (runs:pretty-string fails) |
︙ | ︙ | |||
556 557 558 559 560 561 562 | (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") ;; only increment num-retries when there are no tests runing (if (eq? 0 (list-ref can-run-more 1)) (begin | | | | | | 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 | (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") ;; only increment num-retries when there are no tests runing (if (eq? 0 (list-ref can-run-more 1)) (begin ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost ;; TRY (thread-sleep! (+ 2 *global-delta*)) ;; TRY (thread-sleep! (+ 0.01 *global-delta*))) (set! num-retries (+ num-retries 1)))) (if (> num-retries max-retries) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns)) (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? ((and (not (null? fails))(eq? testmode 'normal)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (if (not (null? tal)) (begin ;; (thread-sleep! *global-delta*) (loop (car tal)(cdr tal)(cons hed reruns))))) (else (debug:print 8 "ERROR: No handler for this condition.") ;; TRY (thread-sleep! (+ 1 *global-delta*)) (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE ;; if can't run more just loop with next possible test (begin (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) ;; (thread-sleep! (+ 2 *global-delta*)) (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) |
︙ | ︙ |
Modified tests/fullrun/megatest.config from [79a26d5a60] to [367622d994].
︙ | ︙ | |||
63 64 65 66 67 68 69 | # If the server can't be started on this port it will try the next port until # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours | | | 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | # If the server can't be started on this port it will try the next port until # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours timeout 0.025 ## disks are: ## name host:/path/to/area ## -or- ## name /path/to/area [disks] disk0 /foobarbazz |
︙ | ︙ |