Overview
Comment: | Gate when first detect that all job slots are in use. No point in looping until slots are availble. ==7.1/1.6/WARN/1201/mars== |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-cleanup |
Files: | files | file ages | folders |
SHA1: |
cd1100885b1bacd3f80504f23d4a9622 |
User & Date: | matt on 2020-08-30 00:14:53 |
Original Comment: | Gate when first detect that all job slots are in use. No point in looping until slots are availble. |
Other Links: | branch diff | manifest | tags |
Context
2020-08-30
| ||
09:54 | Gate tests running on load and max jobs right before launching check-in: 1fbf00b8f7 user: matt tags: v1.65-cleanup | |
00:14 | Gate when first detect that all job slots are in use. No point in looping until slots are availble. ==7.1/1.6/WARN/1201/mars== check-in: cd1100885b user: matt tags: v1.65-cleanup | |
2020-08-29
| ||
09:22 | Added waits to steps, get rid of defunct server stuff (let them die natural deaths) ==7.34/1.5/WARN/1202== check-in: f22d7f8cb6 user: mrwellan tags: v1.65-cleanup | |
Changes
Modified runs.scm from [f86e80cfc1] to [8b4951a591].
︙ | ︙ | |||
14 15 16 17 18 19 20 | ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) | | > | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format sxml-serializer sxml-modifications matchable) (declare (unit runs)) (declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) |
︙ | ︙ | |||
1591 1592 1593 1594 1595 1596 1597 | testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) | < < < < < < < < < | 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 | testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) |
︙ | ︙ | |||
1646 1647 1648 1649 1650 1651 1652 | "\n num-retries: " num-retries "\n reruns: " reruns "\n regfull: " regfull "\n reglen: " reglen "\n length reg: " (length reg) ) | | | 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 | "\n num-retries: " num-retries "\n reruns: " reruns "\n regfull: " regfull "\n reglen: " reglen "\n length reg: " (length reg) ) ;; (runs:parallel-runners-mgmt runsdat) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (member test-name waitons) (begin (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) |
︙ | ︙ | |||
1686 1687 1688 1689 1690 1691 1692 | (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) ;; This would be a good place to block on homehost load | | | > > > > > > > > > > > > > > > > > > > > > > | < | 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 | (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) (not (null? tal))) (loop (car tal)(cdr tal) reg reruns)) (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) ;; This would be a good place to block on homehost load ;; gonna try a strategy change here. ;; ;; check if can run more tests. if yes, continue, if no, rest for 10 seconds, check again ;; repeat until can run more tests ;; ;; look at the test jobgroup and tot jobs running (let loop-can-run-more ((res (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) (remtries 60)) (match res ((run-more num-running . rem) (if (or run-more (< remtries 1)) (begin (if (runs:lownoise "num-running" 30) (debug:print-info 0 *default-log-port* "Have "num-running" tests of max " max-concurrent-jobs)) (runs:dat-can-run-more-tests-set! runsdat res)) ;; capture the result and then drop through (begin (if (runs:lownoise "num-running" 10) (debug:print-info 0 *default-log-port* "Can't run more tests, have "num-running" tests of " max-concurrent-jobs " allowed.")) (thread-sleep! 5) ;; if we've hit max concurrent jobs take a breather, nb// make this configurable (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) (- remtries 1))))))) (let ((loop-list (runs:process-expanded-tests runsdat testdat))) (if loop-list (apply loop loop-list)))) ;; items processed into a list but not came in as a list been processed ;; ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done |
︙ | ︙ |