Overview
Comment: | Re-arranged shrink can-run-more-tests to mostly run locally with a couple calls to cdb:runremote. This did not fix the stuck start on single dependent test |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | dev |
Files: | files | file ages | folders |
SHA1: |
2a574677b3ae2d21c4491c3181fdeed2 |
User & Date: | matt on 2013-06-24 00:52:50 |
Other Links: | branch diff | manifest | tags |
Context
2013-06-25
| ||
01:16 | Blocked test launching fixed. Problems remain check-in: 08d644b7d1 user: mrwellan tags: dev | |
2013-06-24
| ||
00:52 | Re-arranged shrink can-run-more-tests to mostly run locally with a couple calls to cdb:runremote. This did not fix the stuck start on single dependent test check-in: 2a574677b3 user: matt tags: dev | |
2013-06-23
| ||
21:55 | Merged runcontrol into develoment. Last (I hope) step check-in: af74fe9e05 user: matt tags: dev | |
Changes
Modified runs.scm from [5e2ad93678] to [9afe7aba7c].
︙ | ︙ | |||
130 131 132 133 134 135 136 | ;; Every time can-run-more-tests is called increment the delay ;; ;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine ;; (define *last-num-running-tests* 0) (define *runs:can-run-more-tests-count* 0) | | | | | | 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | ;; Every time can-run-more-tests is called increment the delay ;; ;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine ;; (define *last-num-running-tests* 0) (define *runs:can-run-more-tests-count* 0) (define (runs:shrink-can-run-more-tests-count) ;; the db is a dummy var so we can use cdb:remote-run (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) (define (runs:can-run-more-tests jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (cdb:remote-run db:get-count-tests-running #f)) (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) (if (not (eq? *last-num-running-tests* num-running)) (begin (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! *last-num-running-tests* num-running))) |
︙ | ︙ | |||
379 380 381 382 383 384 385 | (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) | | | 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 | (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (let* ((run-limits-info (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running ;; (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) (prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode)) |
︙ | ︙ | |||
435 436 437 438 439 440 441 | (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) | | | 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 | (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (loop hed tal reg reruns) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (let ((newl (append reg (list hed)))) (if regfull (cdr newl) |
︙ | ︙ | |||
468 469 470 471 472 473 474 | (loop (car newtal)(cdr newtal) reg reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id run-info keyvals runname test-record flags #f) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) | | | 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 | (loop (car newtal)(cdr newtal) reg reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id run-info keyvals runname test-record flags #f) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns))) (else ;; must be we have unmet prerequisites |
︙ | ︙ | |||
492 493 494 495 496 497 498 | (loop (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") | | | | 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 | (loop (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop hed tal reg reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) |
︙ | ︙ | |||
538 539 540 541 542 543 544 | (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) | | | 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 | (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (runs:can-run-more-tests jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let* ((prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "can-run-more: " can-run-more "\n testname: " hed |
︙ | ︙ |
Modified tests.scm from [db31a4d61c] to [2e17c6b887].
1 | ;;====================================================================== | | | 1 2 3 4 5 6 7 8 9 | ;;====================================================================== ;; Copyright 2006-2013, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. |
︙ | ︙ |
Modified tests/Makefile from [b15db3182f] to [0259ad9857].
︙ | ︙ | |||
81 82 83 84 85 86 87 | $(MEGATEST) -remove-runs -target a/b :runname c -testpatt % ; \ $(MEGATEST) -runtests % -target a/d :runname c;$(MEGATEST) -list-runs %|egrep ^Run:) > test7.log 2> test7.log logpro test7.logpro test7.html < test7.log @echo @echo Run \"firefox test7.html\" to see the results. # This one failed with v1.55 | | > > | 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | $(MEGATEST) -remove-runs -target a/b :runname c -testpatt % ; \ $(MEGATEST) -runtests % -target a/d :runname c;$(MEGATEST) -list-runs %|egrep ^Run:) > test7.log 2> test7.log logpro test7.logpro test7.html < test7.log @echo @echo Run \"firefox test7.html\" to see the results. # This one failed with v1.55 test8a : cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target ubuntu/nfs/none :runname $(RUNNAME)_waiton_single test8 : test8a cd fullrun;$(MEGATEST) -runtests lineitem_fail 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singletest cd fullrun;$(MEGATEST) -runtests runfirst/fall 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem cd fullrun;$(MEGATEST) -runtests test_mt_vars/2 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem_waiton cleanprep : ../*.scm Makefile */*.config mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links cd ..;make;make install |
︙ | ︙ |