Overview
Comment: | made progress - test2 deadlock situation is broken |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-test2 |
Files: | files | file ages | folders |
SHA1: |
9a750784a33ab9d714cdc68949906610 |
User & Date: | bjbarcla on 2017-11-02 18:06:40 |
Other Links: | branch diff | manifest | tags |
Context
2017-11-02
| ||
18:14 | made progress - test2 deadlock situation is broken check-in: 624ddf95e6 user: bjbarcla tags: v1.65-test2 | |
18:06 | made progress - test2 deadlock situation is broken check-in: 9a750784a3 user: bjbarcla tags: v1.65-test2 | |
2017-10-27
| ||
18:03 | wip check-in: 0706805720 user: bjbarcla tags: v1.65-test2 | |
Changes
Modified runs.scm from [304b43cbed] to [b72555b9ec].
︙ | ︙ | |||
182 183 184 185 186 187 188 189 190 191 192 193 194 195 | (if (> (- currtime lasttime) waitval) (begin (hash-table-set! *runs:denoise* key currtime) #t) #f))) (define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) (thread-sleep! (cond ((> (runs:dat-can-run-more-tests-count runsdat) 20) (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) | > > > | > | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | (if (> (- currtime lasttime) waitval) (begin (hash-table-set! *runs:denoise* key currtime) #t) #f))) (define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) (thread-sleep! (cond ((> (runs:dat-can-run-more-tests-count runsdat) 20) (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2 );; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) |
︙ | ︙ | |||
633 634 635 636 637 638 639 | (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns (define runs:nothing-left-in-queue-count 0) | | | | | > | > | < > > > | | | < < | | < < | < < | > > > > > | > > > | | | 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 | (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns (define runs:nothing-left-in-queue-count 0) ;; check if all remaining tests in test queue: ;; 1) have unexpanded tests ;; 2) are NOT STARTED ;; 3) are itemized ;; indicating run cannot proceed. ;; (define (runs:check-for-itemized-stalemate run-id test-queue test-records) ;; (let loop ((test-queue-left test-queue)) ;; (if (null? test-queue) ;; #t ;; (let* ((hed (car test-queue-left)) ;; (tal (cdr test-queue-left)) ;; (test-record (hash-table-ref test-records hed)) ;; (tconfig (tests:testqueue-get-testconfig test-record)) ;; (items (tests:testqueue-get-items test-record)) ;; (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) ;; (if m (map string->symbol (string-split m)) '(normal)))) ;; (num-running (num-running (rmt:get-count-tests-running-for-run-id run-id))) ;; (is-itemized (not (null? (lset-intersection mode '(itemwait itemmatch toplevel))))) ;; ) ;; (cond ;; ((> num-running 0) #f) ;; stuff is still in flight ;; ((not is-itemized) #f) ;; not an itemized test ;; ((> (irregex-split "/" hed ) 1) #f) ;; this is an item of an itemized test. ;; ((not (or (procedure? items)(eq? items 'have-procedure))) #f) ;; items have been expanded ;; ((not (equal? status ...))) ;; ((not (tests are running or queued...))) ;; (else (loop tal))))))) ;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. on first pass, var not set, on second pass, ok. (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) |
︙ | ︙ | |||
693 694 695 696 697 698 699 | "\n items: " items "\n can-run-more: " can-run-more) (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch | | > | > | > | 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 | "\n items: " items "\n can-run-more: " can-run-more) (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (BB> "ei-1") (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) (begin (debug:print-info 0 *default-log-port* "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") (exit 0)) (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) #f))) ;; ((or (null? prereqs-not-met) (and (member 'toplevel testmode) (null? non-completed))) (BB> "ei-2") (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (if (null? items-list) (let ((test-id (rmt:get-test-id run-id test-name "")) (num-items (rmt:test-toplevel-num-items run-id test-name))) (if (and test-id (not (> num-items 0))) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) (tests:testqueue-set-items! test-record items-list) (list hed tal reg reruns)) (begin (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this") (exit 1)))))) ((and (null? fails) (null? prereq-fails) (not (null? non-completed))) (BB> "ei-3") (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) (append newtal reruns))) ;; prereqstrs is a list of test names as strings that are prereqs for hed (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) prereqs-not-met))) ;; a prereq that is not found in allinqueue will be put in the notinqueue list ;; |
︙ | ︙ | |||
776 777 778 779 780 781 782 | (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) | | > | > > > > | > | 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 | (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider (null? prereq-fails) (null? non-completed)) (BB> "ei-4") (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (BB> "ei-5") (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (begin (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))) ;; BB: this works, btu equivalent for itemwait mode does not work. (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (BB> "ei-6") (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnables) (BB> "ei-7") #f) ;; if we get here and non-completed is null then it is all over. (else (BB> "ei-8") (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) |
︙ | ︙ | |||
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 | (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) | > | 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 | (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (let* ((run-info (rmt:get-run-info run-id)) (canary 'CANARY) ;; consider replacing by gensym (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) |
︙ | ︙ | |||
1257 1258 1259 1260 1261 1262 1263 | (st (db:test-get-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) | > | > > | | | | | > > > > | 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 | (st (db:test-get-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) ;;(tal (append (cdr sorted-test-names) (list canary))) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (runs:incremental-print-results run-id) ;; ;; check if all remaining tests if ;; ;; 1) all tests remaining have unexpanded tests ;; ;; 2) all tests remaining are NOT STARTED ;; ;; 3) all tests remaining are itemized ;; (or (procedure? items)(eq? items 'have-procedure)) (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; moving this to a parallel thread and just run it once. ;; (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (set! last-time-incomplete (current-seconds)) ;; (rmt:find-and-mark-incomplete-all-runs) )) ;; (if (equal? hed canary) ;; (cond ;; (null? ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) (if m (map string->symbol (string-split m)) '(normal)))) |
︙ | ︙ | |||
1356 1357 1358 1359 1360 1361 1362 | (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) (debug:print 4 *default-log-port* "TOP OF LOOP => " "test-name: " test-name | < > > > | < | < > | 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 | (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) (runs:incremental-print-results run-id) (debug:print 4 *default-log-port* "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed "\n tal: " tal "\n reg: " reg "\n test-record " test-record "\n itemdat: " itemdat "\n items: " items "\n item-path: " item-path "\n waitons: " waitons "\n num-retries: " num-retries "\n reruns: " reruns "\n regfull: " regfull "\n reglen: " reglen "\n length reg: " (length reg) ) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (member test-name waitons) (begin (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) |
︙ | ︙ | |||
1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 | number? (map (lambda (waiton) (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run (not (member waiton reruns))) 1 #f)) waitons))))) ;; could do this more elegantly with a marker.... (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 *default-log-port* (map (lambda (row) (conc (string-intersperse (map (lambda (varval) (string-intersperse varval "=")) row) " ") "\n")) items))) | > > > | > > | > > > > > > > > > > > | | | | < | | | | | | | | > | > > > > > > | 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 | number? (map (lambda (waiton) (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run (not (member waiton reruns))) 1 #f)) waitons))))) ;; could do this more elegantly with a marker.... (BB> "rtq-1") (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") (hash-table-set! test-registry tfullname 'removed)) ;; items is #f then the test is ok to be handed off to launch (but not before) ;; ((not items) (BB> "rtq-2") (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (let ((loop-list (runs:process-expanded-tests runsdat testdat))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done (BB> "rtq-3") (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") ;; Must determine if the items list is valid. Discard the test if it is not. (if (and (list? items) (> (length items) 0) (and (list? (car items)) (> (length (car items)) 0)) (debug:debug-mode 1)) (debug:print 2 *default-log-port* (map (lambda (row) (conc (string-intersperse (map (lambda (varval) (string-intersperse varval "=")) row) " ") "\n")) items))) (let* ((items-in-testpatt (filter (lambda (my-itemdat) (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) items) )) (if (null? items-in-testpatt) (begin (let ((test-id (rmt:get-test-id run-id test-name ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern."))) (BB> "NEED TO FLUNK OUT "hed)) (for-each (lambda (my-itemdat) (let* ((new-test-record (let ((newrec (make-tests:testqueue))) (vector-copy! test-record newrec) newrec)) (my-item-path (item-list->path my-itemdat)) (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path (tests:testqueue-set-items! new-test-record #f) (tests:testqueue-set-itemdat! new-test-record my-itemdat) (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath items-in-testpatt))) ;; (debug:print-info 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items") ;; At this point we have possibly added items to tal but all must be handed off to ;; INNER COND logic. I think loop without rotating the queue ;; (loop hed tal reg reruns)) ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test ;; (loop (car newtal)(cdr newtal) reg reruns) (if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (BB> "rtq-4") (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list) (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (BB> "rtq-5") (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) (BB> "rtq-6") (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 *global-delta*)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) ((not (null? tal)) (BB> "rtq-7") (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (BB> "rtq-8") (debug:print-info 0 *default-log-port* "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (BB> "rtq-9") (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) |
︙ | ︙ |