Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.5418) +(define megatest-version 1.5419) ADDED runs-launch-loop-test.scm Index: runs-launch-loop-test.scm ================================================================== --- /dev/null +++ runs-launch-loop-test.scm @@ -0,0 +1,59 @@ +(use srfi-69) + +(define (runs:queue-next-hed tal reg n regful) + (if regful + (car reg) + (car tal))) + +(define (runs:queue-next-tal tal reg n regful) + (if regful + tal + (let ((newtal (cdr tal))) + (if (null? newtal) + reg + newtal + )))) + +(define (runs:queue-next-reg tal reg n regful) + (if regful + (cdr reg) + (if (eq? (length tal) 1) + '() + reg))) + +(use trace) +(trace runs:queue-next-hed + runs:queue-next-tal + runs:queue-next-reg) + + +(define tests '(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20)) + +(define test-registry (make-hash-table)) + +(define n 3) + +(let loop ((hed (car tests)) + (tal (cdr tests)) + (reg '())) + (let* ((reglen (length reg)) + (regful (> reglen n))) + (print "hed=" hed ", length reg=" (length reg) ", (> lenreg n)=" (> (length reg) n)) + (let ((newtal (append tal (list hed)))) ;; used if we are not done with this test + (cond + ((not (hash-table-ref/default test-registry hed #f)) + (hash-table-set! test-registry hed #t) + (print "Registering #" hed) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg n regful) + (runs:queue-next-tal tal reg n regful) + (let ((newl (append reg (list hed)))) + (if regful + (cdr newl) + newl))))) + (else + (print "Running #" hed) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg n regful) + (runs:queue-next-tal tal reg n regful) + (runs:queue-next-reg tal reg n regful)))))))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -320,29 +320,61 @@ (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) (define (runs:make-full-test-name testname itempath) (if (equal? itempath "") testname (conc testname "/" itempath))) + +(define (runs:queue-next-hed tal reg n regful) + (if regful + (if (null? reg) ;; doesn't make sense, this is probably NOT the problem of the car + (car tal) + (car reg)) + (car tal))) + +(define (runs:queue-next-tal tal reg n regful) + (if regful + tal + (let ((newtal (cdr tal))) + (if (null? newtal) + reg + newtal + )))) + +(define (runs:queue-next-reg tal reg n regful) + (if regful + (cdr reg) + (if (eq? (length tal) 1) + '() + reg))) + +;; (use trace) +;; (trace +;; runs:queue-next-hed +;; runs:queue-next-tal +;; runs:queue-next-reg +;; ) ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue run-id runname test-records keyvallst flags test-patts) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) - (test-registery (make-hash-table)) - (registery-mutex (make-mutex)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) - 1)))) + 1))) + (reglen 10)) ;; length of the register queue ahead (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (if (not (null? sorted-test-names)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) + (reg '()) ;; registered, put these at the head of tal (reruns '())) (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) @@ -352,12 +384,18 @@ (waitons (tests:testqueue-get-waitons test-record)) (priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) - (newtal (append tal (list hed)))) - + (newtal (append tal (list hed))) + (regfull (> (length reg) reglen))) + ;; (if (> (length reg) 10) + ;; (begin + ;; (set! tal (cons hed tal)) + ;; (set! hed (car reg)) + ;; (set! reg (cdr reg)) + ;; (set! newtal tal))) (debug:print 6 "test-name: " test-name "\n hed: " hed "\n itemdat: " itemdat "\n items: " items @@ -403,64 +441,76 @@ ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns))) - ;; Registery has been started for this test but has not yet completed + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns))) + ;; Registry has been started for this test but has not yet completed ;; this should be rare, the case where there are only a couple of tests and the db is slow ;; delay a short while and continue - ;; ((eq? (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f) 'start) + ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start) ;; (thread-sleep! 0.01) ;; (loop (car newtal)(cdr newtal) reruns)) ;; count number of 'done, if more than 100 then skip on through. - (;; (and (< (length (filter (lambda (x)(eq? x 'done))(hash-table-values test-registery))) 100) ;; why get more than 200 ahead? - (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. + ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) - ;; NEED TO THREADIFY THIS (let ((th (make-thread (lambda () - (mutex-lock! registery-mutex) - (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'start) - (mutex-unlock! registery-mutex) + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) + (mutex-unlock! registry-mutex) ;; If haven't done it before register a top level test if this is an itemized test - (if (not (eq? (hash-table-ref/default test-registery (runs:make-full-test-name test-name "") #f) 'done)) + (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) - (mutex-lock! registery-mutex) - (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'done) - (mutex-unlock! registery-mutex)) + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) + (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th)) - ;; TRY (thread-sleep! *global-delta*) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) - (loop (car newtal)(cdr newtal) reruns)) - ;; At this point *all* test registrations must be completed. - ((not (null? (filter (lambda (x)(eq? 'start x))(hash-table-values test-registery)))) - (debug:print-info 0 "Waiting on test registrations: " (string-intersperse - (filter (lambda (x) - (eq? (hash-table-ref/default test-registery x #f) 'start)) - (hash-table-keys test-registery)) - ", ")) + (if (and (null? tal)(null? reg)) + (loop hed tal reg reruns) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (let ((newl (append reg (list hed)))) + (if regfull + (cdr newl) + newl)) + reruns))) + ;; At this point hed test registration must be completed. + ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) + 'start) + (debug:print-info 0 "Waiting on test registration(s): " (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) (thread-sleep! 0.1) - (loop hed tal reruns)) + (loop hed tal reg reruns)) ((not have-resources) ;; simply try again after waiting a second (debug:print-info 1 "no resources to run new tests, waiting ...") ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. (thread-sleep! 1) ;; (+ 2 *global-delta*)) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests - (loop (car newtal)(cdr newtal) reruns)) + (loop (car newtal)(cdr newtal) reg reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id runname keyvallst test-record flags #f) - (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'running) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns))) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns))) (else ;; must be we have unmet prerequisites (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. (if (null? fails) @@ -467,26 +517,29 @@ (begin ;; couldn't run, take a breather (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient ;; we made new tal by sticking hed at the back of the list - (loop (car newtal)(cdr newtal) reruns)) + (loop (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) - (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'removed) - (loop (car tal)(cdr tal) (cons hed reruns))) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! (+ 0.01 *global-delta*)) - (loop hed tal reruns))))))))) ;; END OF INNER COND + (loop hed tal reg reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) @@ -509,11 +562,14 @@ items) (if (not (null? tal)) (begin (debug:print-info 4 "End of items list, looping with next after short delay") ;; (thread-sleep! (+ 0.01 *global-delta*)) - (loop (car tal)(cdr tal) reruns)))) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (runs:can-run-more-tests test-record max-concurrent-jobs))) @@ -547,11 +603,11 @@ (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) ;; (thread-sleep! *global-delta*) - (loop hed tal reruns)) + (loop hed tal reg reruns)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") @@ -562,30 +618,36 @@ ;; TRY (thread-sleep! (+ 2 *global-delta*)) ;; TRY (thread-sleep! (+ 0.01 *global-delta*))) (set! num-retries (+ num-retries 1)))) (if (> num-retries max-retries) (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns)) - (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)) + (loop (car newtal)(cdr newtal) reg reruns))) ;; an issue with prereqs not yet met? ((and (not (null? fails))(eq? testmode 'normal)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (if (not (null? tal)) (begin ;; (thread-sleep! *global-delta*) - (loop (car tal)(cdr tal)(cons hed reruns))))) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + (cons hed reruns))))) (else (debug:print 8 "ERROR: No handler for this condition.") ;; TRY (thread-sleep! (+ 1 *global-delta*)) - (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE + (loop (car newtal)(cdr newtal) reg reruns)))) ;; END OF IF CAN RUN MORE ;; if can't run more just loop with next possible test (begin (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) ;; (thread-sleep! (+ 2 *global-delta*)) - (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) + (loop (car newtal)(cdr newtal) reg reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) @@ -597,13 +659,16 @@ (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 *global-delta*)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked - (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) + (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) ((not (null? tal)) (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) + ((not (null? reg)) ;; could we get here with leftovers? + (debug:print-info 0 "Have leftovers!") + (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) )))) ;; LET* ((test-record ;; we get here on "drop through" - loop for next test in queue ADDED tests/fdktestqa/testqa/Makefile Index: tests/fdktestqa/testqa/Makefile ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/Makefile @@ -0,0 +1,3 @@ + +all : + megatest -runtests % -target a/b :runname c Index: tests/fdktestqa/testqa/tests/bigrun/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/testconfig +++ tests/fdktestqa/testqa/tests/bigrun/testconfig @@ -7,11 +7,11 @@ # waiton setup priority 0 # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 150)(loop (+ a 1)(cons a res)) res)) >)) " ")} +NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 4500)(loop (+ a 1)(cons a res)) res)) >)) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt