Overview
Comment: | improved preq_fail handling |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64-itemflow2 |
Files: | files | file ages | folders |
SHA1: |
70b2187dab77a1df6008957566af81e8 |
User & Date: | bjbarcla on 2017-10-06 17:16:17 |
Other Links: | branch diff | manifest | tags |
Context
2017-10-06
| ||
17:19 | improved preq_fail handling check-in: 109f029d2c user: bjbarcla tags: v1.64-itemflow2 | |
17:16 | improved preq_fail handling check-in: 70b2187dab user: bjbarcla tags: v1.64-itemflow2 | |
14:55 | db:get-prereqs-not-met is improved. check-in: 3c05b69ebe user: bjbarcla tags: v1.64-itemflow2 | |
Changes
Modified runs.scm from [860bf0d110] to [657e2feedd].
︙ | ︙ | |||
602 603 604 605 606 607 608 | ;; loop with (car reg) tal (cdr reg) reruns ;; If tal is empty ;; but have items in reg; loop with (car reg)(cdr reg) '() reruns ;; If reg is empty => all done (define (runs:queue-next-hed tal reg n regfull) (if regfull | | | | | | 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 | ;; loop with (car reg) tal (cdr reg) reruns ;; If tal is empty ;; but have items in reg; loop with (car reg)(cdr reg) '() reruns ;; If reg is empty => all done (define (runs:queue-next-hed tal reg n regfull) (if regfull (if (null? reg) #f (car reg)) (if (null? tal) ;; tal is used up, pop from reg (if (null? reg) #f (car reg)) (car tal)))) (define (runs:queue-next-tal tal reg n regfull) (if regfull tal (if (null? tal) ;; must transfer from reg (if (null? reg) '() (cdr reg)) (cdr tal)))) (define (runs:queue-next-reg tal reg n regfull) (if regfull (if (null? reg) '() (cdr reg)) (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal '() reg))) ;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 ;; (define (runs:loop-values tal reg reglen regfull reruns) |
︙ | ︙ | |||
772 773 774 775 776 777 778 | (if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") (let ((test-id (rmt:get-test-id run-id hed ""))) | | | 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 | (if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) |
︙ | ︙ | |||
886 887 888 889 890 891 892 | (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup(list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) | | | 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 | (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup(list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs (runs:calc-fails prereqs-not-met) (begin (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) '()))) (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! (not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) |
︙ | ︙ | |||
1035 1036 1037 1038 1039 1040 1041 | (begin ;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again | > > > > > > | | | | > | | > | > | > > | | | | > > > | 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 | (begin ;; couldn't run, take a breather (if (runs:lownoise "Waiting for more work to do..." 60) (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (begin (let ((my-test-id (rmt:get-test-id run-id test-name item-path))) (mt:test-set-state-status-by-id run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2")) (if (or (not (null? reg))(not (null? tal))) (if (vector? hed) (begin (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path " from the launch list as it has prerequistes that are FAIL") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) ;; This next is for the items (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) (runs:loop-values tal reg reglen regfull reruns)) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond ((member "RUNNING" (map db:test-get-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 0.1) (runs:loop-values tal reg reglen regfull reruns)) ((or (not nth-try) (and (number? nth-try) (< nth-try 2))) (hash-table-set! test-registry hed (if (number? nth-try) (+ nth-try 1) 0)) (if (runs:lownoise (conc "not removing test " hed) 60) (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (runs:loop-values newtal reg reglen regfull reruns)) ((symbol? nth-try) (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")) ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) (mt:test-set-state-status-by-testname run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) (hash-table-set! test-registry hed 'removed) ;; was 0 (if (not (or (null? reg) (null? tal))) (runs:loop-values tal reg reglen regfull reruns) #f)))) (else (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; ELSE: can't drop this - maybe running? Just keep trying ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))) ;;) ;;from old experiment ) ;; end if (or (not (null? reg))(not (null? tal))) )))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t)) t |
︙ | ︙ | |||
1445 1446 1447 1448 1449 1450 1451 | ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) | | | 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 | ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) |
︙ | ︙ | |||
1504 1505 1506 1507 1508 1509 1510 | ;; we get here on "drop through". All done! (runs:run-post-hook run-id) (debug:print-info 1 *default-log-port* "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) | | | | 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 | ;; we get here on "drop through". All done! (runs:run-post-hook run-id) (debug:print-info 1 *default-log-port* "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...* (not (member (db:test-get-status test) '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) prereqs-not-met)) (define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL (filter (lambda (test) (and (vector? test) ;; not (string? test)) (equal? (db:test-get-state test) "NOT_STARTED") (not (member (db:test-get-status test) '("n/a" "KEEP_TRYING"))))) prereqs-not-met)) |
︙ | ︙ |