Changes In Branch v1.64-farmedout-runtest Through [2c853b3d8d] Excluding Merge-Ins
This is equivalent to a diff from 5be250d6fc to 2c853b3d8d
2018-03-29
| ||
16:43 | error check MTESTHASH check-in: cb3bbc9d2e user: bjbarcla tags: v1.64 | |
2018-02-06
| ||
18:33 | wip check-in: 8f16df638a user: bb tags: v1.64-forked-launch | |
12:34 | instrumented runtestsqueue and test launcher for profiling Leaf check-in: 9b8a609dee user: bjbarcla tags: v1.64-instrumented-runtest | |
2018-02-02
| ||
17:54 | wip check-in: d3905e798c user: bjbarcla tags: v1.64-farmedout-runtest | |
17:28 | wip check-in: 2c853b3d8d user: bjbarcla tags: v1.64-farmedout-runtest | |
2018-01-17
| ||
21:03 | Merged in some of Jeff's changes to Makefile.deploy check-in: 6275b9b5c5 user: matt tags: v1.65 | |
2017-12-14
| ||
13:32 | Updated deploy Makefile check-in: 5be250d6fc user: jmoon18 tags: v1.64 | |
2017-12-12
| ||
14:34 | updated itemmap section in manual; hopefully it is clearer now check-in: c607976150 user: bjbarcla tags: v1.64 | |
Modified megatest.scm from [4ccc1620b9] to [50f303f608].
︙ | ︙ | |||
543 544 545 546 547 548 549 | (printf "Sending signal/term to ~A\n" pid) (process-signal pid signal/term)))))) (process:children #f)) (original-exit exit-code))))) ;; for some switches always print the command to stderr ;; | | | 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 | (printf "Sending signal/term to ~A\n" pid) (process-signal pid signal/term)))))) (process:children #f)) (original-exit exit-code))))) ;; for some switches always print the command to stderr ;; (if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-run-test") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) ;; some switches imply homehost. Exit here if not on homehost ;; (let ((homehost-required (list "-cleanup-db" "-server"))) (if (apply args:any? homehost-required) (if (not (common:on-homehost?)) |
︙ | ︙ | |||
1555 1556 1557 1558 1559 1560 1561 | ;; (launch:setup)) ;; (let* ((since-time (string->number (args:get-arg "-since"))) ;; (run-ids (db:get-changed-run-ids since-time))) ;; ;; (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) ;; (print (sort run-ids <)) ;; (set! *didsomething* #t))) | | > > > > > > > > > > > > > > > > > | | 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 | ;; (launch:setup)) ;; (let* ((since-time (string->number (args:get-arg "-since"))) ;; (run-ids (db:get-changed-run-ids since-time))) ;; ;; (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) ;; (print (sort run-ids <)) ;; (set! *didsomething* #t))) ;;====================================================================== ;; run single test ;;====================================================================== ;; launch test; executed from runloop ;; should only be called by megatest, not user. (if (args:get-arg "-internal-run-test") (general-run-call "-internal-run-test" "run single test; internal use only" (lambda (target runname keys keyvals) (let* ((flags args:arg-hash)) (run:test-standalone target runname keys keyvals flags) ) ))) ;;====================================================================== ;; full run ;;====================================================================== ;; get lock in db for full run for this directory ;; for all tests with deps ;; walk tree of tests to find head tasks ;; add head tasks to task queue ;; add dependant tasks to task queue ;; add remaining tasks to task queue ;; for each task in task queue ;; if have adequate resources ;; launch task ;; else ;; put task in deferred queue ;; if still ok to run tasks ;; process deferred tasks per above steps ;; run all tests are are Not COMPLETED and PASS or CHECK (if (or (args:get-arg "-runall") (args:get-arg "-run") (args:get-arg "-rerun-clean") (args:get-arg "-rerun-all") (args:get-arg "-runtests")) (let ((need-clean (or (args:get-arg "-rerun-clean") |
︙ | ︙ |
Modified runs.scm from [cd4ddfd17d] to [6b5db4b196].
︙ | ︙ | |||
331 332 333 334 335 336 337 | (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) ;; (tdbdat (tasks:open-db)) (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) | | > | 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) ;; (tdbdat (tasks:open-db)) (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (allowed-tests #f) (last-loop-top-time (current-seconds))) ;; check if readonly (when readonly-mode (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.") (exit 1)) ;; per user request. If less than 100Meg space on dbdir partition, bail out with error |
︙ | ︙ | |||
482 483 484 485 486 487 488 | (setenv "MT_TEST_NAME" hed) ;; (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry)) ((hed-mode) (let ((m (config-lookup config "requirements" "mode"))) (if m (map string->symbol (string-split m)) '(normal)))) ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) | > | > > > | 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 | (setenv "MT_TEST_NAME" hed) ;; (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry)) ((hed-mode) (let ((m (config-lookup config "requirements" "mode"))) (if m (map string->symbol (string-split m)) '(normal)))) ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) ((loop-delta-time) (- (current-seconds) last-loop-top-time)) ) (set! last-loop-top-time (current-seconds)) (BB> "RUNLOOPTOP | cycletime="loop-delta-time" depth="(add1 (length tal))" hed="hed) (debug:print-info 8 *default-log-port* "waitons: " waitons) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (or (member hed waitons) (member hed waitors)) (begin (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!") |
︙ | ︙ | |||
1237 1238 1239 1240 1241 1242 1243 | ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) | | | 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 | ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (rmt:find-and-mark-incomplete) (BB> "entered run-tests-queue") (let* ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) |
︙ | ︙ | |||
1278 1279 1280 1281 1282 1283 1284 | keyvals: keyvals run-info: run-info ;; newtal: newtal all-tests-registry: all-tests-registry ;; itemmaps: itemmaps ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running | | > > | > > | 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 | keyvals: keyvals run-info: run-info ;; newtal: newtal all-tests-registry: all-tests-registry ;; itemmaps: itemmaps ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running )) (rtq-looptop-lastvisit-time (current-seconds))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec)) (st (db:test-get-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (let ((cycletime (- (current-seconds) rtq-looptop-lastvisit-time))) (BB> "RTQ LOOP TOP | cycletime="cycletime"| qdepth="(add1 (length tal))" hed="hed)) (set! rtq-looptop-lastvisit-time (current-seconds)) (runs:incremental-print-results run-id) (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; moving this to a parallel thread and just run it once. ;; |
︙ | ︙ | |||
1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 | (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step ;; (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (test-conf (tests:testqueue-get-testconfig test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) | > > > > > > > > > > > > | | | | 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 | (define (runs:pretty-string lst) (map (lambda (t) (if (not (vector? t)) (conc t) (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) (define (run:test-standalone target runname keys keyvals flags) (let* ((run-id ...) (run-info ...) ) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step ;; (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; all-tests-registry - used to determine test path ;; All these vars might be referenced by the testconfig file reader ;; flags are a hash of k/v pairs that could become commandline switches to a standalone version (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (test-conf (tests:testqueue-get-testconfig test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) (test-path (hash-table-ref all-tests-registry test-name)) ;; path to test definition (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... (force (hash-table-ref/default flags "-force" #f)) ;; ignore some red flags; should be removed (rerun (hash-table-ref/default flags "-rerun" #f)) ;; used to decide to rerun if test already exists, always rerun (pretend not started) (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) (item-path "") (db #f) (full-test-name #f)) ;; setting itemdat to a list if it is #f |
︙ | ︙ | |||
1647 1648 1649 1650 1651 1652 1653 | (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) (begin (hash-table-set! *test-meta-updated* test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) | | | > > | | | 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 | (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) (begin (hash-table-set! *test-meta-updated* test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (rmt:get-test-id run-id test-name item-path)) ;; ?? necessary? (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop () ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (rmt:register-test run-id test-name item-path) ;; (set! test-id (rmt:get-test-id run-id test-name item-path)))) (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") (set! testdat (rmt:get-test-info-by-id run-id test-id)) (if (not testdat) (begin (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) (set! test-id (db:test-get-id testdat)) (if (common:file-exists? test-path) (change-directory test-path) (begin (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") (change-directory *toppath*))) (case (if force ;; (args:get-arg "-force") 'NOT_STARTED (if testdat (string->symbol (test:get-state testdat)) 'failed-to-insert)) ((failed-to-insert) ;; should never happen now (debug:print-error 0 *default-log-port* "Failed to insert the record into the db")) ((NOT_STARTED COMPLETED DELETED INCOMPLETE) (let ((runflag #f)) (cond ;; -force, run no matter what (force (set! runflag #t)) ;; NOT_STARTED, run no matter what |
︙ | ︙ | |||
1761 1762 1763 1764 1765 1766 1767 | (> numseconds time-since-last)) (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) | | | 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 | (> numseconds time-since-last)) (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) (if skip-test (begin (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) ;; HERE WE GO (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! *globalexitstatus* 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) |
︙ | ︙ |