Overview
Comment: | Save final test state and status in .final-status file in test directories. When considering marking a test DEAD, look at this file first to see if it actually passed, and if so, mark it PASSED. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65 |
Files: | files | file ages | folders |
SHA1: |
7dbdcbf5a0b70e2610cc6a033d855cf4 |
User & Date: | mmgraham on 2020-02-26 17:40:15 |
Other Links: | branch diff | manifest | tags |
Context
2020-02-27
| ||
17:04 | Better checking on parsing of steps check-in: 79f23e4a0a user: mrwellan tags: v1.65 | |
2020-02-26
| ||
17:40 | Save final test state and status in .final-status file in test directories. When considering marking a test DEAD, look at this file first to see if it actually passed, and if so, mark it PASSED. check-in: 7dbdcbf5a0 user: mmgraham tags: v1.65 | |
2020-02-24
| ||
20:48 | Replace install of loadrunner (useless) with mtrunner (useful). check-in: 4021551b19 user: matt tags: v1.65 | |
Changes
Modified db.scm from [8a077942c3] to [4dafe820e6].
︙ | ︙ | |||
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 | #f #t))))) ;; given a launch delay (minimum time from last launch) return amount of time to wait ;; ;; (define (db:launch-delay-left dbstruct run-id launch-delay) ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) (server-start-allowance 200) (server-overloaded-budget 200) (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) (launch-monitor-on-time-budget 30) (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) ) (db:with-db dbstruct #f #f (lambda (db) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) | > > > > > > > > > > > > > > > > > > > > > > > > > | 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 | #f #t))))) ;; given a launch delay (minimum time from last launch) return amount of time to wait ;; ;; (define (db:launch-delay-left dbstruct run-id launch-delay) (define (db:get-status-from-final-status-file run-dir) (let ( (infile (conc run-dir "/.final-status"))) ;; first verify we are able to write the output file (if (not (file-read-access? infile)) (begin (debug:print 0 *default-log-port* "ERROR: cannot read " infile) (debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir) #f ) (with-input-from-file infile read-lines) ) ) ) ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) ;; The default running-deadtime is 720 seconds = 12 minutes. ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30)) (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) (server-start-allowance 200) (server-overloaded-budget 200) (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) (launch-monitor-on-time-budget 30) (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) ) (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) (db:with-db dbstruct #f #f (lambda (db) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) |
︙ | ︙ | |||
1655 1656 1657 1658 1659 1660 1661 1662 | (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) (begin (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)))) db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');" | > | | | 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 | (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) (begin (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)))) db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');" run-id running-deadtime) ;; default time 720 seconds (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path event-time run-duration) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) (begin (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');" run-id remotehoststart-deadtime) ;; default time 230 seconds. ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") |
︙ | ︙ | |||
1708 1709 1710 1711 1712 1713 1714 | ;; incompleted)) (min-incompleted-ids (map car incompleted)) ;; do 'em all (all-ids (append min-incompleted-ids (map car oldlaunched)))) (if (> (length all-ids) 0) (begin (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD") (for-each | | > > > > > > | > > > | > > > > | > > > > > > > | 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 | ;; incompleted)) (min-incompleted-ids (map car incompleted)) ;; do 'em all (all-ids (append min-incompleted-ids (map car oldlaunched)))) (if (> (length all-ids) 0) (begin (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD") (for-each (lambda (test-id) (let* ( (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id)) (result (db:get-status-from-final-status-file run-dir))) (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) (begin (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD") (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "PASS" "Test stopped responding but it has PASSED; marking it PASS in the DB.") ) (begin (debug:print 0 *default-log-port* "INFO: test " test-id " final state/status is not COMPLETED/PASS. It is " result) (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.") ) ) ) ) all-ids) ) ) ) ) ) ) ) ;; ALL REPLACED BY THE BLOCK ABOVE ;; ;; (sqlite3:execute ;; db ;; (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" ;; (string-intersperse (map conc all-ids) ",") |
︙ | ︙ |
Modified launch.scm from [aaf31bf374] to [bdc92fbf76].
︙ | ︙ | |||
782 783 784 785 786 787 788 | ;; any previous runs ;; (db:test-remove-steps db run-id testname itemdat) ;; now is also a good time to write the .testconfig file (let* ((tconfig-fname (conc work-area "/.testconfig")) (tconfig-tmpfile (conc tconfig-fname ".tmp")) (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) (configf:write-alist tconfig tconfig-tmpfile) | | > | 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 | ;; any previous runs ;; (db:test-remove-steps db run-id testname itemdat) ;; now is also a good time to write the .testconfig file (let* ((tconfig-fname (conc work-area "/.testconfig")) (tconfig-tmpfile (conc tconfig-fname ".tmp")) (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) (configf:write-alist tconfig tconfig-tmpfile) (file-move tconfig-tmpfile tconfig-fname #t) (delete-file* ".final-status")) ;; (let* ((m (make-mutex)) (kill-job? #f) (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status (job-thread #f) ;; (keep-going #t) (misc-flags (let ((ht (make-hash-table))) |
︙ | ︙ | |||
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 | (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") (else "FAIL")))) ;; (db:test-get-status testinfo))) (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) (tests:test-set-status! run-id test-id new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) | > > > > | > > | 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 | (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") (else "FAIL")))) ;; (db:test-get-status testinfo))) (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) ;; Leave a .final-status file for each sub-test (tests:save-final-status run-id test-id) (tests:test-set-status! run-id test-id new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no ;; Leave a .final-status file for the top level test (tests:save-final-status run-id test-id) (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) (mutex-unlock! m) (launch:end-of-run-check run-id ) (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") (if (not (launch:einf-exit-status exit-info)) (exit 4)))) |
︙ | ︙ |
Modified runs.scm from [650378342c] to [d285c61578].
︙ | ︙ | |||
1994 1995 1996 1997 1998 1999 2000 | ;; then files other than *testdat.db* (directory-fold (lambda (f x) (let ((fullname (conc real-dir "/" f))) (if (not (string-search (regexp "testdat.db") f)) (runs:recursive-delete-with-error-msg fullname))) (+ 1 x)) | | | 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 | ;; then files other than *testdat.db* (directory-fold (lambda (f x) (let ((fullname (conc real-dir "/" f))) (if (not (string-search (regexp "testdat.db") f)) (runs:recursive-delete-with-error-msg fullname))) (+ 1 x)) 0 real-dir #t) ;; then the entire directory (runs:recursive-delete-with-error-msg real-dir)) ;; cleanup often needs to remove all but the last N runs per target ;; ;; target-patts a1/b1/c1,a2/b2/c2 ... ;; |
︙ | ︙ |
Modified tests.scm from [9b628bb958] to [52d412173f].
︙ | ︙ | |||
1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 | (if (eq? time-a time-b) (< id-a id-b) ;; (string<? (conc (vector-ref a 2)) ;; (conc (vector-ref b 2))) #f)) (string<? (conc time-a)(conc time-b))))))))) ;; summarize test in to a file test-summary.html in the test directory ;; (define (tests:summarize-test run-id test-id) (let* ((test-dat (rmt:get-test-info-by-id run-id test-id)) (out-dir (db:test-get-rundir test-dat)) (out-file (conc out-dir "/test-summary.html"))) | > > > > > > > > > > > > > > > > > > > | 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 | (if (eq? time-a time-b) (< id-a id-b) ;; (string<? (conc (vector-ref a 2)) ;; (conc (vector-ref b 2))) #f)) (string<? (conc time-a)(conc time-b))))))))) ;; Save test state and status in to a file .final-status in the test directory ;; (define (tests:save-final-status run-id test-id) (let* ((test-dat (rmt:get-test-info-by-id run-id test-id)) (out-dir (db:test-get-rundir test-dat)) (status-file (conc out-dir "/.final-status")) ) ;; first verify we are able to write the output file (if (not (file-write-access? out-dir)) (debug:print 0 *default-log-port* "ERROR: cannot write .final-status to " out-dir) (let* ((outp (open-output-file status-file)) (status (db:test-get-status test-dat)) (state (db:test-get-state test-dat))) (fprintf outp "~S\n" state) (fprintf outp "~S\n" status) (close-output-port outp))))) ;; summarize test in to a file test-summary.html in the test directory ;; (define (tests:summarize-test run-id test-id) (let* ((test-dat (rmt:get-test-info-by-id run-id test-id)) (out-dir (db:test-get-rundir test-dat)) (out-file (conc out-dir "/test-summary.html"))) |
︙ | ︙ | |||
1420 1421 1422 1423 1424 1425 1426 | (oup (open-output-file out-file)) (status (db:test-get-status test-dat)) (color (common:get-color-from-status status)) (logf (db:test-get-final_logf test-dat)) (steps-dat (tests:get-compressed-steps run-id test-id))) ;; (dcommon:get-compressed-steps #f 1 30045) ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log")) | | | 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 | (oup (open-output-file out-file)) (status (db:test-get-status test-dat)) (color (common:get-color-from-status status)) (logf (db:test-get-final_logf test-dat)) (steps-dat (tests:get-compressed-steps run-id test-id))) ;; (dcommon:get-compressed-steps #f 1 30045) ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log")) (s:output-new oup (s:html (s:title "Summary for " full-name) (s:body (s:h2 "Summary for " full-name) (s:table 'cellspacing "0" 'border "1" |
︙ | ︙ |