Megatest: Diff

Differences From Artifact [642d238014]:

File runs.scm — part of check-in [2bb0d5f0ff] at 2017-07-10 18:08:14 on branch v1.64 — Added lock between test launch and removing test data (user: mrwellan, size: 111986) [annotate] [blame] [check-ins using]

To Artifact [147cbc54ec]:

File runs.scm — part of check-in [32584d6c1d] at 2017-08-28 11:42:54 on branch v1.64 — Cleaned up couple more named loop calls in runs.scm. Added post-run-hook. (user: matt, size: 113164) [annotate] [blame] [check-ins using] [more...]

︙			︙
264 265 266 267 268 269 270 271 272 273 274 275 276 277	(debug:print 0 default-log-port "Message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 default-log-port "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 default-log-port "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 default-log-port "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 default-log-port "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target))	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > >	264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316	(debug:print 0 default-log-port "Message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 default-log-port "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 default-log-port "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 default-log-port "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 default-log-port "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) (define (runs:run-post-hook run-id) (let* ((run-post-hook (configf:lookup configdat "runs" "post-hook")) (existing-tests (if run-post-hook (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses #f #f ;; offset limit #f ;; not-in #f ;; sort-by #f ;; sort-order #f ;; get full data (not 'shortlist) 0 ;; (runs:gendat-inc-results-last-update runs:general-data) ;; last update time 'dashboard) '())) (log-dir (conc toppath "/logs")) (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) (full-log-fname (conc log-dir "/" log-file))) (if run-post-hook ;; (if (null? existing-tests) ;; (debug:print 0 default-log-port "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) (let* ((use-log-dir (if (not (directory-exists? log-dir)) (handle-exceptions exn (begin (debug:print 0 default-log-port "WARNING: Failed to create " log-dir) #f) (create-directory log-dir #t) #t) #t)) (start-time (current-seconds)) (actual-logf (if use-log-dir full-log-fname log-file))) (handle-exceptions exn (begin (print-call-chain default-log-port) (debug:print 0 default-log-port "Message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 default-log-port "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) (debug:print-info 0 default-log-port "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) (system (conc run-post-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 default-log-port "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target))
︙			︙
324 325 326 327 328 329 330 ~~331 332 333~~ 334 335 336 337 338 339 340	(exit 4))))) (thread-start! th2) (thread-start! th1) (thread-join! th2))))) (set-signal-handler! signal/int sighand) (set-signal-handler! signal/term sighand)) ~~;; force the starting of a server (debug:print 0 default-log-port "waiting on server...") (server:start-and-wait toppath)~~ (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (set! runconf (if (common:file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals target) (begin (debug:print 0 default-log-port "WARNING: You do not have a run config file: " runconfigf) #f)))	\| \| \|	363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379	(exit 4))))) (thread-start! th2) (thread-start! th1) (thread-join! th2))))) (set-signal-handler! signal/int sighand) (set-signal-handler! signal/term sighand)) ;; force the starting of a server -- removed BB 17ww28 - no longer needed. ;;(debug:print 0 default-log-port "waiting on server...") ;;(server:start-and-wait toppath) (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (set! runconf (if (common:file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals target) (begin (debug:print 0 default-log-port "WARNING: You do not have a run config file: " runconfigf) #f)))
︙			︙
519 520 521 522 523 524 525 ~~526 527 528 529 530 531 532 533 534 535~~ 536 537 538 539 540 541 542	(handle-exceptions exn (begin (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn))) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))) ~~;; (handle-exceptions~~ ~~;; exn~~ ~~;; (begin~~ ~~;; (print-call-chain (current-error-port))~~ ~~;; (debug:print-error 0 default-log-port "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))~~ ~~;; (if (> run-queue-retries 0)~~ ~~;; (begin~~ ~~;; (set! run-queue-retries (- run-queue-retries 1))~~ ~~;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))))~~ ~~;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))~~ "runs:run-tests-queue")) (th2 (make-thread (lambda () ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (if keep-going (handle-exceptions	< < < < < < < < < <	558 559 560 561 562 563 564 565 566 567 568 569 570 571	(handle-exceptions exn (begin (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn))) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))) "runs:run-tests-queue")) (th2 (make-thread (lambda () ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (if keep-going (handle-exceptions
︙			︙
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607	(define (runs:queue-next-reg tal reg n regfull) (if regfull (cdr reg) (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal '() reg))) (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 default-log-port	> > > > > > > > >	621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645	(define (runs:queue-next-reg tal reg n regfull) (if regfull (cdr reg) (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal '() reg))) ;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 ;; (define (runs:loop-values tal reg reglen regfull reruns) (list (runs:queue-next-hed tal reg reglen regfull) ;; hed (runs:queue-next-tal tal reg reglen regfull) ;; tal (runs:queue-next-reg tal reg reglen regfull) ;; reg reruns)) ;; reruns (define runs:nothing-left-in-queue-count 0) ;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this. on first pass, var not set, on second pass, ok. (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 default-log-port
︙			︙
633 634 635 636 637 638 639 ~~640 641 642 643~~ 644 645 646 647 648 649 650	((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~ (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin	< \| < <	671 672 673 674 675 676 677 678 679 680 681 682 683 684 685	((and (not (member 'toplevel testmode)) (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) '(DONOTRUN removed CANNOTRUN))) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here (debug:print-info 1 default-log-port "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") (if (or (not (null? tal)) (not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) (begin (debug:print-info 0 default-log-port "Nothing left in the queue!") ;; If get here twice then we know we've tried to expand all items ;; since there must be a logic issue with the handling of loops in the ;; items expand phase we will brute force an exit here. (if (> runs:nothing-left-in-queue-count 2) (begin
︙			︙
709 710 711 712 713 714 715 ~~716 717 718 719~~ 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 ~~738 739 740 741~~ 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 ~~758 759 760 761~~ 762 763 764 765 766 767 768 769 770 ~~771 772 773 774~~ 775 776 777 778 779 780 781	(let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f ~~(list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)~~ (runs:que~~ue-next-tal~~ trimmed-tal trimmed-reg reglen regfull) ~~(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)~~ ~~reruns)~~)) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) (null? prereq-fails) (null? non-completed)) (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) ~~~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~)))~~ ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) ~~~~(list (runs:queue-next-hed tal reg reglen regfull)~~ ~~(runs:queue-next-tal tal reg reglen regfull)~~ ~~(runs:queue-next-reg tal reg reglen regfull)~~ (cons hed reruns)))~~ #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over. (else (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") ~~;; (list (runs:queue-next-hed tal reg reglen regfull)~~ ~~;; (runs:queue-next-tal tal reg reglen regfull)~~ ~~;; (runs:queue-next-reg tal reg reglen regfull)~~ ~~;; reruns)~~ (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond	< \| < \| < \| < \| < < < \| > < < < <	744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806	(let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) )) (list (car newtal)(append (cdr newtal) reg) '() reruns)))) ((and (null? fails) (null? prereq-fails) (null? non-completed)) (if (runs:can-keep-running? hed 20) (begin (runs:inc-cant-run-tests hed) (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default seen-cant-run-tests hed 0)) ;; getting here likely means the system is way overloaded, kill a full minute before continuing (thread-sleep! 60) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 default-log-port "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) (runs:loop-values tal reg reglen regfull reruns) ))) ((and (or (not (null? fails)) (not (null? prereq-fails))) (member 'normal testmode)) (debug:print-info 1 default-log-port "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (if (not (null? prereq-fails)) (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) (if (or (not (null? reg))(not (null? tal))) (begin (hash-table-set! test-registry hed 'CANNOTRUN) (runs:loop-values tal reg reglen regfull (cons hed reruns)) ) #f)) ;; #f flags do not loop ((and (not (null? fails))(member 'toplevel testmode)) (if (or (not (null? reg))(not (null? tal))) (list (car newtal)(append (cdr newtal) reg) '() reruns) #f)) ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over. (else (debug:print 0 default-log-port "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() (map (lambda (t) (cond
︙			︙
871 872 873 874 875 876 877 ~~878 879 880 881~~ 882 883 884 885 886 887 888	;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 default-log-port "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) (if (or (not (null? tal))(not (null? reg))) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~ #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (debug:print-info 4 default-log-port "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs	< \| < <	896 897 898 899 900 901 902 903 904 905 906 907 908 909 910	;; Check item path against item-patts, ;; ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 default-log-port "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) (if (or (not (null? tal))(not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) (debug:print-info 4 default-log-port "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? transport-type 'fs) ;; no point in parallel registration if use fs
︙			︙
899 900 901 902 903 904 905 ~~906~~ 907 908 909 910 911 912 913	(begin (rmt:register-test run-id test-name "") (if (rmt:get-test-id run-id test-name "") (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg (if regfull (append (cdr reg) (list hed)) (append reg (list hed))) reruns)))	\|	921 922 923 924 925 926 927 928 929 930 931 932 933 934 935	(begin (rmt:register-test run-id test-name "") (if (rmt:get-test-id run-id test-name "") (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg (if regfull (append (cdr reg) (list hed)) (append reg (list hed))) reruns)))
︙			︙
956 957 958 959 960 961 962 ~~963 964 965 966~~ 967 968 969 970 971 972 973	(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns)~~ #f)) ;; must be we have unmet prerequisites ;; (else (debug:print 4 default-log-port "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue	< \| < <	978 979 980 981 982 983 984 985 986 987 988 989 990 991 992	(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) #f)) ;; must be we have unmet prerequisites ;; (else (debug:print 4 default-log-port "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue
︙			︙
995 996 997 998 999 1000 1001 ~~1002 1003 1004 1005 1006~~ 1007 1008 1009 1010 1011 1012 ~~1013 1014 1015 1016~~ 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 ~~1027 1028 1029 1030 1031 1032 1033~~ 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 ~~1044 1045 1046 1047~~ 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 ~~1061~~ 1062 1063 ~~1064 1065 1066 1067~~ 1068 1069 1070 1071 1072 1073 1074	(let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) ~~(list (runs:queue-next-hed tal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ tal reg reglen regfull) ~~(runs:queue-next-reg tal reg reglen regfull)~~ ~~reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?~~ )) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond ((member "RUNNING" (map db:test-get-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal new~~tal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns))~~ ((or (not nth-try) (and (number? nth-try) (< nth-try 10))) (hash-table-set! test-registry hed (if (number? nth-try) (+ nth-try 1) 0)) (if (runs:lownoise (conc "not removing test " hed) 60) (debug:print 1 default-log-port "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ~~;; (list hed tal reg reruns)~~ ~~;; (list (car newtal)(cdr newtal) reg reruns)~~ ~~;; (hash-table-set! test-registry hed 'removed)~~ ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ newtal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns))~~ ((symbol? nth-try) (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) (hash-table-set! test-registry hed 0) ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ newtal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns))))~~ (else (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 default-log-port " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; can't drop this - maybe running? Just keep trying ~~(let ((runable-tests (runs:runable-tests prereqs-not-met)))~~ (if (null? runable-tests) #f ;; I think we are truly done here ~~(list (runs:queue-next-hed newtal reg reglen regfull)~~ (runs:que~~ue-next-tal~~ newtal reg reglen regfull) ~~(runs:queue-next-reg newtal reg reglen regfull)~~ ~~reruns)))))))))~~ ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t)) t	< \| < < < < \| < < < < < < \| < < < \| < < \| < \| < <	1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074	(let ((test-id (rmt:get-test-id run-id hed ""))) (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) ;; This next is for the items (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) (runs:loop-values tal reg reglen regfull reruns)) (let ((nth-try (hash-table-ref/default test-registry hed 0))) (cond ((member "RUNNING" (map db:test-get-state prereqs-not-met)) (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) (thread-sleep! 4) (runs:loop-values tal reg reglen regfull reruns)) ((or (not nth-try) (and (number? nth-try) (< nth-try 10))) (hash-table-set! test-registry hed (if (number? nth-try) (+ nth-try 1) 0)) (if (runs:lownoise (conc "not removing test " hed) 60) (debug:print 1 default-log-port "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) (runs:loop-values newtal reg reglen regfull reruns)) ((symbol? nth-try) (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) (hash-table-set! test-registry hed 0) (runs:loop-values newtal reg reglen regfull)))) (else (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) (debug:print 0 default-log-port "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 default-log-port " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; can't drop this - maybe running? Just keep trying (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? (if (null? runable-tests) #f ;; I think we are truly done here (runs:loop-values newtal reg reglen regfull reruns))))))))) ;; scan a list of tests looking to see if any are potentially runnable ;; (define (runs:runable-tests tests) (filter (lambda (t) (if (not (vector? t)) t
︙			︙
1269 1270 1271 1272 1273 1274 1275 1276 ~~1277 1278 1279 1280 1281 1282~~ 1283 1284 1285 1286 1287 1288 1289	testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) ;; every 15 minutes verify the server is there for this run (if (and (common:low-noise-print 240 "try start server" run-id) (not (or (and runremote (remote-server-url runremote) (server:ping (remote-server-url runremote))) (server:check-if-running toppath)))) (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash))	> \| \| \| \| \| \|	1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290	testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) ;; -- removed BB 17ww28 - no longer needed. ;; every 15 minutes verify the server is there for this run ;; (if (and (common:low-noise-print 240 "try start server" run-id) ;; (not (or (and runremote ;; (remote-server-url runremote) ;; (server:ping (remote-server-url runremote))) ;; (server:check-if-running toppath)))) ;; (server:kind-run toppath)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash))
︙			︙
1407 1408 1409 1410 1411 1412 1413 ~~1414~~ 1415 1416 1417 1418 1419 1420 1421	(if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ~~((or (procedure? items)(eq? items 'have-procedure))~~ (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test	\|	1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422	(if (null? tal) #f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test
︙			︙
1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477	(if (not (eq? num-running prev-num-running)) (debug:print-info 0 default-log-port "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) (thread-sleep! 5) ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! (debug:print-info 1 default-log-port "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test)	>	1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479	(if (not (eq? num-running prev-num-running)) (debug:print-info 0 default-log-port "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) (thread-sleep! 5) ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! (runs:run-post-hook run-id) (debug:print-info 1 default-log-port "All tests launched"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) (not (member (db:test-get-status test)
︙			︙
1961 1962 1963 1964 1965 1966 1967 ~~1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980~~ 1981 1982 1983 1984 1985 1986 1987	(let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir) ;; (resolve-pathname run-dir) (common:nice-path run-dir) #f)) (clean-mode (or mode 'remove-all)) (test-id (db:test-get-id test)) (lock-key (conc "test-" test-id)) (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds (if (car lock) #t (if (> (current-seconds) expire-time) (begin (debug:print-info 0 default-log-port "Timed out waiting for a lock to clean test with id " test-id) (rmt:no-sync-del! lock-key) ;; destroy the lock (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; (begin (thread-sleep! 1) (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 default-log-port "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5)	\| \| \| \| \| \| \| \| \| \| \| \| \| >	1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990	(let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir) ;; (resolve-pathname run-dir) (common:nice-path run-dir) #f)) (clean-mode (or mode 'remove-all)) (test-id (db:test-get-id test)) ;; (lock-key (conc "test-" test-id)) ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds ;; (if (car lock) ;; #t ;; (if (> (current-seconds) expire-time) ;; (begin ;; (debug:print-info 0 default-log-port "Timed out waiting for a lock to clean test with id " test-id) ;; (rmt:no-sync-del! lock-key) ;; destroy the lock ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; ;; (begin ;; (thread-sleep! 1) ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) ) (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 default-log-port "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5)
︙			︙
2014 2015 2016 2017 2018 2019 2020 ~~2021~~ 2022 2023 2024 2025 2026 2027 2028	(debug:print 0 default-log-port "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) ~~(rmt:no-sync-del! lock-key)))~~ ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code	\| >	2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032	(debug:print 0 default-log-port "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) ;; (rmt:no-sync-del! lock-key) )) ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup ;; this wrapper is used to reduce the replication of code
︙			︙