Megatest

Check-in [cd1100885b]
Login
Overview
Comment:Gate when first detect that all job slots are in use. No point in looping until slots are availble. ==7.1/1.6/WARN/1201/mars==
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-cleanup
Files: files | file ages | folders
SHA1: cd1100885b1bacd3f80504f23d4a962272484a4d
User & Date: matt on 2020-08-30 00:14:53
Original Comment: Gate when first detect that all job slots are in use. No point in looping until slots are availble.
Other Links: branch diff | manifest | tags
Context
2020-08-30
09:54
Gate tests running on load and max jobs right before launching check-in: 1fbf00b8f7 user: matt tags: v1.65-cleanup
00:14
Gate when first detect that all job slots are in use. No point in looping until slots are availble. ==7.1/1.6/WARN/1201/mars== check-in: cd1100885b user: matt tags: v1.65-cleanup
2020-08-29
09:22
Added waits to steps, get rid of defunct server stuff (let them die natural deaths) ==7.34/1.5/WARN/1202== check-in: f22d7f8cb6 user: mrwellan tags: v1.65-cleanup
Changes

Modified runs.scm from [f86e80cfc1] to [8b4951a591].

14
15
16
17
18
19
20
21

22
23
24
25
26
27
28
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
     posix-extras directory-utils pathname-expand typed-records format  sxml-serializer sxml-modifications)


(declare (unit runs))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))







|
>







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
     posix-extras directory-utils pathname-expand typed-records format  sxml-serializer
     sxml-modifications matchable)

(declare (unit runs))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)
    
        ;; -- removed BB 17ww28 - no longer needed.
	;; every 15 minutes verify the server is there for this run
	;; (if (and (common:low-noise-print 240 "try start server"  run-id)
	;; 	 (not (or (and *runremote*
	;; 		       (remote-server-url *runremote*)
	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))








<
<
<
<
<
<
<
<
<







1592
1593
1594
1595
1596
1597
1598









1599
1600
1601
1602
1603
1604
1605
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)
    









	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
		     "\n  num-retries: " num-retries
                     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
                     )

	(runs:parallel-runners-mgmt runsdat)

	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))







|







1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
		     "\n  num-retries: " num-retries
                     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
                     )

	;; (runs:parallel-runners-mgmt runsdat)

	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))
1686
1687
1688
1689
1690
1691
1692
1693
1694






















1695
1696
1697
1698
1699
1700
1701
1702
1703
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))
	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))

	  ;; This would be a good place to block on homehost load

























	  
	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	  (let ((loop-list (runs:process-expanded-tests runsdat testdat)))
            (if loop-list (apply loop loop-list))))

	 ;; items processed into a list but not came in as a list been processed
	 ;;
	 ((and (list? items)     ;; thus we know our items are already calculated
	       (not   itemdat))  ;; and not yet expanded into the list of things to be done







|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<







1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709

1710
1711
1712
1713
1714
1715
1716
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))
	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))

	  ;; This would be a good place to block on homehost load

	  ;; gonna try a strategy change here.
	  ;;
	  ;; check if can run more tests. if yes, continue, if no, rest for 10 seconds, check again
	  ;; repeat until can run more tests
	  ;;
	  ;; look at the test jobgroup and tot jobs running
	  (let loop-can-run-more
	      ((res      (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))
	       (remtries 60))
	    (match res
              ((run-more num-running . rem)
	       (if (or run-more
		       (< remtries 1))
		   (begin
		     (if (runs:lownoise "num-running" 30)
			 (debug:print-info 0 *default-log-port* "Have "num-running" tests of max " max-concurrent-jobs))
		     (runs:dat-can-run-more-tests-set! runsdat res)) ;; capture the result and then drop through
		   (begin
		     (if (runs:lownoise "num-running" 10)
			 (debug:print-info 0 *default-log-port* "Can't run more tests, have "num-running" tests of "
					   max-concurrent-jobs " allowed."))
		     (thread-sleep! 5) ;; if we've hit max concurrent jobs take a breather, nb// make this configurable
		     (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
					(- remtries 1)))))))
	      

	  (let ((loop-list (runs:process-expanded-tests runsdat testdat)))
            (if loop-list (apply loop loop-list))))

	 ;; items processed into a list but not came in as a list been processed
	 ;;
	 ((and (list? items)     ;; thus we know our items are already calculated
	       (not   itemdat))  ;; and not yet expanded into the list of things to be done