Megatest: Check-in [624ddf95e6]

Overview

Comment:	made progress - test2 deadlock situation is broken
Downloads:	Tarball \| ZIP archive \| SQL archive
Timelines:	family \| ancestors \| descendants \| both \| v1.65-test2
Files:	files \| file ages \| folders
SHA1:	624ddf95e6b54a943a644c0b5e785c990880999f
User & Date:	bjbarcla on 2017-11-02 18:14:08
Other Links:	branch diff \| manifest \| tags

Context

2017-11-02
18:38		catch up with v1.65 tip check-in: 9f5f817c0e user: bjbarcla tags: v1.65-test2
18:14		made progress - test2 deadlock situation is broken check-in: 624ddf95e6 user: bjbarcla tags: v1.65-test2
18:06		made progress - test2 deadlock situation is broken check-in: 9a750784a3 user: bjbarcla tags: v1.65-test2

Changes

Hide Diffs Unified Diffs Ignore Whitespace Patch

Modified runs.scm from [b72555b9ec] to [0dfe3f4c47].

︙			︙
188 189 190 191 192 193 194 ~~195~~ 196 197 198 199 200 201 202	(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) ~~(thread-sleep! (cond~~ ((> (runs:dat-can-run-more-tests-count runsdat) 20) (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 default-log-port "waiting for tasks to complete, sleeping briefly ...")) (configf:lookup-number configdat "setup" "inter-test-delay" default: 0.1) ;; was 2 );; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id))	\|	188 189 190 191 192 193 194 195 196 197 198 199 200 201 202	(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)? ((> (runs:dat-can-run-more-tests-count runsdat) 20) (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 default-log-port "waiting for tasks to complete, sleeping briefly ...")) (configf:lookup-number configdat "setup" "inter-test-delay" default: 0.1) ;; was 2 );; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id))
︙			︙
636 637 638 639 640 641 642 ~~643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676~~ 677 678 679 680 681 682 683	(list (runs:queue-next-hed tal reg reglen regfull) ;; hed (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns (define runs:nothing-left-in-queue-count 0) ~~;; check if all remaining tests in test queue:~~ ~~;; 1) have unexpanded tests~~ ~~;; 2) are NOT STARTED~~ ~~;; 3) are itemized~~ ~~;; indicating run cannot proceed.~~ ~~;; (define (runs:check-for-itemized-stalemate run-id test-queue test-records)~~ ~~;; (let loop ((test-queue-left test-queue))~~ ~~;; (if (null? test-queue)~~ ~~;; #t~~ ~~;; (let* ((hed (car test-queue-left))~~ ~~;; (tal (cdr test-queue-left))~~ ~~;; (test-record (hash-table-ref test-records hed))~~ ~~;; (tconfig (tests:testqueue-get-testconfig test-record))~~ ~~;; (items (tests:testqueue-get-items test-record))~~ ~~;; (testmode (let ((m (config-lookup tconfig "requirements" "mode")))~~ ~~;; (if m (map string->symbol (string-split m)) '(normal))))~~ ~~;; (num-running (num-running (rmt:get-count-tests-running-for-run-id run-id)))~~ ~~;; (is-itemized (not (null? (lset-intersection mode '(itemwait itemmatch toplevel)))))~~ ~~;; )~~ ~~;; (cond~~ ~~;; ((> num-running 0) #f) ;; stuff is still in flight~~ ~~;; ((not is-itemized) #f) ;; not an itemized test~~ ~~;; ((> (irregex-split "/" hed ) 1) #f) ;; this is an item of an itemized test.~~ ~~;; ((not (or (procedure? items)(eq? items 'have-procedure))) #f) ;; items have been expanded~~ ~~;; ((not (equal? status ...)))~~ ~~;; ((not (tests are running or queued...)))~~ ~~;; (else (loop tal)))))))~~ ;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. on first pass, var not set, on second pass, ok. (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 default-log-port	< < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < <	636 637 638 639 640 641 642 643 644 645 646 647 648 649	(list (runs:queue-next-hed tal reg reglen regfull) ;; hed (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 default-log-port
︙			︙
706 707 708 709 710 711 712 ~~713~~ 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 ~~734~~ 735 736 737 738 739 740 741	(cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here ~~(~~BB>~~ "ei-1")~~ (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin (debug:print 0 default-log-port "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") (exit 0)) (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;; ((or (null? prereqs-not-met) (and (member 'toplevel testmode) (null? non-completed))) ~~(~~BB>~~ "ei-2")~~ (debug:print-info 4 default-log-port "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list)	\| \|	672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707	(cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 4 default-log-port "cond branch - " "ei-1") (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin (debug:print 0 default-log-port "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") (exit 0)) (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;; ((or (null? prereqs-not-met) (and (member 'toplevel testmode) (null? non-completed))) (debug:print-info 4 default-log-port "cond branch - " "ei-2") (debug:print-info 4 default-log-port "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list)
︙			︙
751 752 753 754 755 756 757 ~~758~~ 759 760 761 762 763 764 765	(begin (debug:print-error 0 default-log-port "The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ((and (null? fails) (null? prereq-fails) (not (null? non-completed))) ~~(~~BB>~~ "ei-3")~~ (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) prereqs-not-met))) ;; a prereq that is not found in allinqueue will be put in the notinqueue list ;;	\|	717 718 719 720 721 722 723 724 725 726 727 728 729 730 731	(begin (debug:print-error 0 default-log-port "The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ((and (null? fails) (null? prereq-fails) (not (null? non-completed))) (debug:print-info 4 default-log-port "cond branch - " "ei-3") (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) prereqs-not-met))) ;; a prereq that is not found in allinqueue will be put in the notinqueue list ;;
︙			︙
792 793 794 795 796 797 798 ~~799~~ 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 ~~820~~ 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 ~~839~~ 840 841 842 843 ~~844~~ 845 846 ~~847~~ 848 849 850 851 852 853 854	(runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider (null? prereq-fails) (null? non-completed)) ~~(~~BB>~~ "ei-4")~~ (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 0 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) ~~(~~BB>~~ "ei-5")~~ (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (begin (debug:print 4 default-log-port"BB> set PREQ_FAIL on "hed) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))) ;; BB: this works, btu equivalent for itemwait mode does not work. (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) ~~(~~BB>~~ "ei-6")~~ (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnables) ~~(~~BB>~~ "ei-7")~~ #f) ;; if we get here and non-completed is null then it is all over. (else ~~(~~BB>~~ "ei-8")~~ (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t)	\| \| \| \| \|	758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820	(runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider (null? prereq-fails) (null? non-completed)) (debug:print-info 4 default-log-port "cond branch - " "ei-4") (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 0 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 4 default-log-port "cond branch - " "ei-5") (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (begin (debug:print 4 default-log-port"BB> set PREQ_FAIL on "hed) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))) ;; BB: this works, btu equivalent for itemwait mode does not work. (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (debug:print-info 4 default-log-port "cond branch - " "ei-6") (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnables) (debug:print-info 4 default-log-port "cond branch - " "ei-7") #f) ;; if we get here and non-completed is null then it is all over. (else (debug:print-info 4 default-log-port "cond branch - " "ei-8") (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t)
︙			︙
1222 1223 1224 1225 1226 1227 1228 ~~1229~~ 1230 1231 1232 1233 1234 1235 1236	(debug:print 5 default-log-port "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) ~~(canary 'CANARY) ;; consider replacing by gensym~~ (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number configdat "setup" "max_concurrent_jobs" default: 50))	<	1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201	(debug:print 5 default-log-port "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number configdat "setup" "max_concurrent_jobs" default: 50))
︙			︙
1277 1278 1279 1280 1281 1282 1283 ~~1284~~ 1285 1286 1287 1288 1289 1290 1291	(st (db:test-get-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) ~~;;(tal (append (cdr sorted-test-names) (list canary)))~~ (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (runs:incremental-print-results run-id)	<	1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255	(st (db:test-get-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (runs:incremental-print-results run-id)
︙			︙
1305 1306 1307 1308 1309 1310 1311 ~~1312 1313 1314 1315~~ 1316 1317 1318 1319 1320 1321 1322	;; (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (set! last-time-incomplete (current-seconds)) ;; (rmt:find-and-mark-incomplete-all-runs) )) ~~;; (if (equal? hed canary)~~ ~~;; (cond~~ ~~;; (null?~~ ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (map string->symbol (string-split m)) '(normal))))	< < < <	1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282	;; (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (set! last-time-incomplete (current-seconds)) ;; (rmt:find-and-mark-incomplete-all-runs) )) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (map string->symbol (string-split m)) '(normal))))
︙			︙
1420 1421 1422 1423 1424 1425 1426 ~~1427~~ 1428 1429 1430 1431 1432 1433 ~~1434~~ 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 ~~1448~~ 1449 1450 1451 1452 1453 1454 1455	number? (map (lambda (waiton) (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run (not (member waiton reruns))) 1 #f)) waitons))))) ;; could do this more elegantly with a marker.... ~~(~~BB>~~ "rtq-1")~~ (debug:print 0 default-log-port "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) ~~(~~BB>~~ "rtq-2")~~ (debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done ~~(~~BB>~~ "rtq-3")~~ (debug:print-info 4 default-log-port "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1))	\| \| \|	1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415	number? (map (lambda (waiton) (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run (not (member waiton reruns))) 1 #f)) waitons))))) ;; could do this more elegantly with a marker.... (debug:print-info 4 default-log-port "cond branch - " "rtq-1") (debug:print 0 default-log-port "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) (debug:print-info 4 default-log-port "cond branch - " "rtq-2") (debug:print-info 4 default-log-port "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (debug:print-info 4 default-log-port "cond branch - " "rtq-3") (debug:print-info 4 default-log-port "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1))
︙			︙
1464 1465 1466 1467 1468 1469 1470 ~~1471 1472 1473~~ ~~1474 1475 1476~~ 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 ~~1492~~ 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 ~~1507~~ 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 ~~1522~~ 1523 1524 1525 ~~1526~~ 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 ~~1538~~ 1539 1540 ~~1541~~ 1542 1543 1544 ~~1545~~ 1546 1547 1548 1549 1550 1551 1552	(let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) items) )) (if (null? items-in-testpatt) ~~~~(begin~~ (let ((test-id (rmt:get-test-id run-id test-name "")))~~ (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern."))) ~~(BB> "NEED TO FLUNK OUT "hed))~~ (for-each (lambda (my-itemdat) (let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) ~~;; (debug:print-info 0 default-log-port "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items")~~ ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ~~(~~BB>~~ "rtq-4")~~ (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list) (debug:print-info 4 default-log-port " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) ~~(~~BB>~~ "rtq-5")~~ (debug:print-error 0 default-log-port "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) ~~(~~BB>~~ "rtq-6")~~ (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 default-log-port "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 global-delta)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) ((not (null? tal)) ~~(~~BB>~~ "rtq-7")~~ (debug:print-info 4 default-log-port "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? ~~(~~BB>~~ "rtq-8")~~ (debug:print-info 0 default-log-port "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else ~~(~~BB>~~ "rtq-9")~~ (debug:print-info 4 default-log-port "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))	< < \| > \| \| < \| \| \| \| \| \| \|	1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510	(let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) items) )) (if (null? items-in-testpatt) (let ((test-id (rmt:get-test-id run-id test-name ""))) (debug:print-info 0 default-log-port "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching test pattern -- marking status ZERO_ITEMS") (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern."))) (for-each (lambda (my-itemdat) (let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this is an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 default-log-port "cond branch - " "rtq-4") (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list) (debug:print-info 4 default-log-port " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print-info 4 default-log-port "cond branch - " "rtq-5") (debug:print-error 0 default-log-port "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (debug:print-info 4 default-log-port "cond branch - " "rtq-6") (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 default-log-port "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 global-delta)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) ((not (null? tal)) (debug:print-info 4 default-log-port "cond branch - " "rtq-7") (debug:print-info 4 default-log-port "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 4 default-log-port "cond branch - " "rtq-8") (debug:print-info 0 default-log-port "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 default-log-port "cond branch - " "rtq-9") (debug:print-info 4 default-log-port "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))
︙			︙