Megatest

Check-in [cc163f91ad]
Login
Overview
Comment:Minor refactor of some runs.scm code?
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: cc163f91ada9ad2cb046a08bd05c59e192884712
User & Date: mrwellan on 2017-08-25 17:59:22
Other Links: branch diff | manifest | tags
Context
2017-08-28
11:42
Cleaned up couple more named loop calls in runs.scm. Added post-run-hook. check-in: 32584d6c1d user: matt tags: v1.64
2017-08-25
17:59
Minor refactor of some runs.scm code? check-in: cc163f91ad user: mrwellan tags: v1.64
2017-08-24
11:53
added whodunit script check-in: 53f5d1bc18 user: mrwellan tags: v1.64
Changes

Modified runs.scm from [df9cc9bbed] to [9cd64b3eea].

592
593
594
595
596
597
598







599
600
601
602
603
604
605
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612







+
+
+
+
+
+
+







(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))


(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)
        (runs:queue-next-tal tal reg reglen regfull)
        (runs:queue-next-reg tal reg reglen regfull)
        reruns))

(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.  
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
634
635
636
637
638
639
640
641
642

643
644
645
646
647
648
649
650
651
641
642
643
644
645
646
647


648


649
650
651
652
653
654
655







-
-
+
-
-








     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
          (runs:loop-values tal reg reglen regfull reruns)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  (begin
	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin
710
711
712
713
714
715
716
717
718

719
720

721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740

741
742

743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762


763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
714
715
716
717
718
719
720


721


722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740


741


742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758




759
760
761
762
763
764
765
766
767
768
769




770
771
772
773
774
775
776







-
-
+
-
-
+


















-
-
+
-
-
+
















-
-
-
-
+
+









-
-
-
-








	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
                  (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)
			reruns)))
                  ))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))

     ((and (null? fails)
	   (null? prereq-fails)
	   (null? non-completed))
      (if  (runs:can-keep-running? hed 20)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
	    (thread-sleep! 60)
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
            (runs:loop-values tal reg reglen regfull reruns)
		  (runs:queue-next-reg tal reg reglen regfull)
		  reruns))))
            )))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  (cons hed reruns)))
            (runs:loop-values tal reg reglen regfull (cons hed reruns))
            )
	  #f)) ;; #f flags do not loop

     ((and (not (null? fails))(member 'toplevel testmode))
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over.
     (else
      (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
      ;; (list (runs:queue-next-hed tal reg reglen regfull)
      ;;   	(runs:queue-next-tal tal reg reglen regfull)
      ;;   	(runs:queue-next-reg tal reg reglen regfull)
      ;;   	reruns)
      (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond

Modified utils/whodunit.scm from [5a4e2fa6b3] to [862906085a].

1
2
3
4
5

6
7
8
9
10
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

27
28
29
30
31
32
33
1
2
3
4

5


6
7
8

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32




-
+
-
-



-
+















+







(use posix srfi-69)

(define *numsamples* (or (and (> (length (argv)) 1)
                              (string->number (cadr (argv))))
                         20))
                         3))

(print "Using " *numsamples* " as number of samples.")

(define (topdata)
  (with-input-from-pipe
   (conc "top -b -n " *numsamples* " -d 0.2")
   (conc "top -b -n " *numsamples* " -d 0.1")
   read-lines))

(define (cleanup-data topdat)list
  (let loop ((hed (car topdat))
              (tal (cdr topdat))
              (res '()))
    (let* ((line-list (string-split hed))
           (nums      (map (lambda (indat)(or (string->number indat) indat)) line-list))
           (not-data  (or (null? nums)
                          (not (number? (car nums)))))
           (new-res   (if not-data res (cons nums res))))
      (if (null? tal)
          new-res
          (loop (car tal)(cdr tal) new-res)))))

(print "Getting " *numsamples* " samples of cpu usage data.")
(define data (cleanup-data (topdata)))
(define pidhash  (make-hash-table))
(define userhash (make-hash-table))

;; sum up and normalize the 
(for-each
 (lambda (indat)