Overview
Comment: | end of run detection |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-end-of-run |
Files: | files | file ages | folders |
SHA1: |
e719f22355973db6ddadc5c9c809b196 |
User & Date: | pjhatwal on 2019-03-11 17:56:24 |
Other Links: | branch diff | manifest | tags |
Context
2019-03-18
| ||
10:57 | catch up with v1.65 Closed-Leaf check-in: fe83540f6a user: pjhatwal tags: v1.65-end-of-run | |
2019-03-11
| ||
17:56 | end of run detection check-in: e719f22355 user: pjhatwal tags: v1.65-end-of-run | |
17:55 | added options for lock unlck via mtutil check-in: f36ee6d2df user: pjhatwal tags: v1.65-end-of-run | |
Changes
Modified api.scm from [1541791de9] to [4abe8743ea].
︙ | ︙ | |||
78 79 80 81 82 83 84 | login tasks-get-last testmeta-get-record have-incompletes? synchash-get get-changed-record-ids get-run-record-ids | | > | | 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | login tasks-get-last testmeta-get-record have-incompletes? synchash-get get-changed-record-ids get-run-record-ids get-not-completed-cnt)) (define api:write-queries '( get-keys-write ;; dummy "write" query to force server start ;; SERVERS start-server kill-server ;; TESTS test-set-state-status-by-id delete-test-records delete-old-deleted-test-records test-set-state-status test-set-top-process-pid set-state-status-and-roll-up-items update-pass-fail-counts top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst") ;; RUNS register-run set-tests-state-status delete-run lock/unlock-run update-run-event_time mark-incomplete set-state-status-and-roll-up-run ;; STEPS teststep-set-status! ;; TEST DATA test-data-rollup csv->test-data |
︙ | ︙ | |||
191 192 193 194 195 196 197 198 199 200 201 202 203 204 | )) ((delete-test-records) (apply db:delete-test-records dbstruct params)) ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) ;; RUNS ((register-run) (apply db:register-run dbstruct params)) ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) ((delete-run) (apply db:delete-run dbstruct params)) | > | 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | )) ((delete-test-records) (apply db:delete-test-records dbstruct params)) ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) ;; RUNS ((register-run) (apply db:register-run dbstruct params)) ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) ((delete-run) (apply db:delete-run dbstruct params)) |
︙ | ︙ | |||
269 270 271 272 273 274 275 276 277 278 279 280 281 282 | ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) ((synchash-get) (apply synchash:server-get dbstruct params)) ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) ((get-test-times) (apply db:get-test-times dbstruct params)) ;; RUNS ((get-run-info) (apply db:get-run-info dbstruct params)) ((get-run-status) (apply db:get-run-status dbstruct params)) | > | 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) ((synchash-get) (apply synchash:server-get dbstruct params)) ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) ((get-test-times) (apply db:get-test-times dbstruct params)) ;; RUNS ((get-run-info) (apply db:get-run-info dbstruct params)) ((get-run-status) (apply db:get-run-status dbstruct params)) |
︙ | ︙ |
Modified dashboard.scm from [067e696739] to [2679042d5f].
︙ | ︙ | |||
155 156 157 158 159 160 161 162 163 164 165 166 167 168 | (debug:print 0 *default-log-port* "WARNING: Current policy requires running dashboard on homehost: " (common:get-homehost)))) ;; RA => Might require revert for filters ;; create a watch dog to move changes from lt/.db/*.db to megatest.db ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") (thread-start! (make-thread common:watchdog "Watchdog thread")) ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) ;;) | > | 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | (debug:print 0 *default-log-port* "WARNING: Current policy requires running dashboard on homehost: " (common:get-homehost)))) ;; RA => Might require revert for filters ;; create a watch dog to move changes from lt/.db/*.db to megatest.db ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") (thread-start! (make-thread common:watchdog "Watchdog thread")) ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) ;;) |
︙ | ︙ | |||
2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 | result)) (tabs (apply iup:tabs #:tabchangepos-cb (lambda (obj curr prev) (debug:catch-and-dump (lambda () (let* ((tab-num (dboard:commondat-curr-tab-num commondat)) (tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))) (dboard:tabdat-layout-update-ok-set! tabdat #f)) (dboard:commondat-curr-tab-num-set! commondat curr) (let* ((tab-num (dboard:commondat-curr-tab-num commondat)) (tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))) (dboard:commondat-please-update-set! commondat #t) (dboard:tabdat-layout-update-ok-set! tabdat #t))) "tabchangepos")) (dashboard:summary commondat stats-dat tab-num: 0) runs-view (dashboard:runs-summary commondat onerun-dat tab-num: 2) ;; (dashboard:new-view db data new-view-dat tab-num: 3) | > > | 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 | result)) (tabs (apply iup:tabs #:tabchangepos-cb (lambda (obj curr prev) (debug:catch-and-dump (lambda () (let* ((tab-num (dboard:commondat-curr-tab-num commondat)) (tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))) (dboard:tabdat-layout-update-ok-set! tabdat #f)) (dboard:commondat-curr-tab-num-set! commondat curr) (let* ((tab-num (dboard:commondat-curr-tab-num commondat)) (tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))) (dboard:commondat-please-update-set! commondat #t) (dboard:tabdat-layout-update-ok-set! tabdat #t))) "tabchangepos")) (dashboard:summary commondat stats-dat tab-num: 0) runs-view (dashboard:runs-summary commondat onerun-dat tab-num: 2) ;; (dashboard:new-view db data new-view-dat tab-num: 3) |
︙ | ︙ |
Modified db.scm from [bf6ebf1f66] to [6a0d8206a6].
︙ | ︙ | |||
2599 2600 2601 2602 2603 2604 2605 | user (conc newlockval " " run-id)) (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) (define (db:set-run-status dbstruct run-id status msg) (db:with-db dbstruct #f #f (lambda (db) | | > > > > > > > > | 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 | user (conc newlockval " " run-id)) (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) (define (db:set-run-status dbstruct run-id status msg) (db:with-db dbstruct #f #f (lambda (db) (if msg (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) (define (db:set-run-state-status dbstruct run-id state status ) (db:with-db dbstruct #f #f (lambda (db) (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) (define (db:get-run-status dbstruct run-id) (let ((res "n/a")) (db:with-db dbstruct #f #f (lambda (db) (sqlite3:for-each-row |
︙ | ︙ | |||
2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 | run-id #f (lambda (db) (sqlite3:first-result db "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;" run-id testname)))) (define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup) (if (not jobgroup) 0 ;; (let ((testnames '())) ;; get the testnames (db:with-db dbstruct #f #f | > > > > > > > > > > > > | 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 | run-id #f (lambda (db) (sqlite3:first-result db "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;" run-id testname)))) (define (db:get-not-completed-cnt dbstruct run-id) (db:with-db dbstruct run-id #f (lambda (db) (print "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=" run-id) (sqlite3:first-result db "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=?;" run-id)))) (define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup) (if (not jobgroup) 0 ;; (let ((testnames '())) ;; get the testnames (db:with-db dbstruct #f #f |
︙ | ︙ | |||
3665 3666 3667 3668 3669 3670 3671 | ;; status is the priority rollup of all completed statesfu ;; ;; if test-name is an integer work off that instead of test-name test-path ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) ;; establish info on incoming test followed by info on top level test ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met | < | 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 | ;; status is the priority rollup of all completed statesfu ;; ;; if test-name is an integer work off that instead of test-name test-path ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) ;; establish info on incoming test followed by info on top level test ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met (let* ((testdat (if (number? test-name) (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id (db:get-test-info dbstruct run-id test-name item-path))) (test-id (db:test-get-id testdat)) (test-name (if (number? test-name) (db:test-get-testname testdat) test-name)) |
︙ | ︙ | |||
3692 3693 3694 3695 3696 3697 3698 | (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test | > > > > > > > > > > > > > > > > > > > | | | | | | < | < < < | | | | | | | | | | | | | | | | | | | | | < | | | < < < < < | | < | | | | < | < | < < | > | > | > > | | > > | | | < < | | | > < < | | > | > > > > > > > > > > > > | 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 | (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test (state-stauses (db:roll-up-rules state-status-counts state status)) (newstate (car state-stauses)) (newstatus (cadr state-stauses))) (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " (apply conc (map (lambda (x) (conc (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) state-status-counts))); end debug:print (if tl-test-id (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct )))))) (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) tr-res))))) (define (db:roll-up-rules state-status-counts state status) (let* ((running (length (filter (lambda (x) (member (dbr:counts-state x) *common:running-states*)) state-status-counts))) (bad-not-started (length (filter (lambda (x) (and (equal? (dbr:counts-state x) "NOT_STARTED") (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) state-status-counts))) (all-curr-states (common:special-sort ;; worst -> best (sort of) (delete-duplicates (if (and state (not (member state *common:dont-roll-up-states*))) (cons state (map dbr:counts-state state-status-counts)) (map dbr:counts-state state-status-counts))) *common:std-states* >)) (all-curr-statuses (common:special-sort ;; worst -> best (delete-duplicates (if (and state status (not (member state *common:dont-roll-up-states*))) (cons status (map dbr:counts-status state-status-counts)) (map dbr:counts-status state-status-counts))) *common:std-statuses* >)) (non-completes (filter (lambda (x) (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) all-curr-states)) (preq-fails (filter (lambda (x) (equal? x "PREQ_FAIL")) all-curr-statuses)) (num-non-completes (length non-completes)) (newstate (cond ((> running 0) "RUNNING") ;; anything running, call the situation running ((> (length preq-fails) 0) "NOT_STARTED") ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED (else (car all-curr-states)))) (newstatus (cond ((> (length preq-fails) 0) "PREQ_FAIL") ((or (> bad-not-started 0) (and (equal? newstate "NOT_STARTED") (> num-non-completes 0))) "STARTED") (else (car all-curr-statuses))))) (debug:print-info 2 *default-log-port* "\n--> probe db:set-state-status-and-roll-up-items: " "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) "\n--> running: "running "\n--> bad-not-started: "bad-not-started "\n--> non-non-completes: "num-non-completes "\n--> non-completes: "non-completes "\n--> all-curr-states: "all-curr-states "\n--> all-curr-statuses: "all-curr-statuses "\n--> newstate "newstate "\n--> newstatus "newstatus "\n\n") ;; NB// Pass the db so it is part of the transaction (list newstate newstatus))) (define (db:set-state-status-and-roll-up-run dbstruct run-id) (mutex-lock! *db-transaction-mutex*) (db:with-db dbstruct #f #f (lambda (db) (let ((tr-res (sqlite3:with-transaction db (lambda () (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) (state-stauses (db:roll-up-rules state-status-counts #f #f )) (newstate (car state-stauses)) (newstatus (cadr state-stauses))) (db:set-run-state-status dbstruct run-id newstate newstatus )))))) (mutex-unlock! *db-transaction-mutex*) tr-res)))) (define (db:get-all-state-status-counts-for-run dbstruct run-id) (let* ((test-count-recs (db:with-db dbstruct #f #f (lambda (db) (sqlite3:map-row (lambda (state status count) (make-dbr:counts state: state status: status count: count)) db "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" run-id ))))) test-count-recs)) ;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* (define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) (item-state (or item-state-in (db:test-get-state test-info))) (item-status (or item-status-in (db:test-get-status test-info))) (other-items-count-recs (db:with-db dbstruct #f #f (lambda (db) (sqlite3:map-row |
︙ | ︙ | |||
3862 3863 3864 3865 3866 3867 3868 | (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") ;; TESTS '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") ;; Test state and status '(set-test-state "UPDATE tests SET state=? WHERE id=?;") '(set-test-status "UPDATE tests SET state=? WHERE id=?;") | | | 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 | (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") ;; TESTS '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") ;; Test state and status '(set-test-state "UPDATE tests SET state=? WHERE id=?;") '(set-test-status "UPDATE tests SET state=? WHERE id=?;") '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE ;; Test comment '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps '(test_data-pf-rollup "UPDATE tests |
︙ | ︙ |
Modified launch.scm from [9077b3d3fc] to [b89d82dc9d].
︙ | ︙ | |||
779 780 781 782 783 784 785 | (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) ;; only state and status needed - use lazy routine (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; Am I completed? (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status | < | | 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 | (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) ;; only state and status needed - use lazy routine (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; Am I completed? (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test ) (new-status (cond ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) |
︙ | ︙ | |||
811 812 813 814 815 816 817 | )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) (mutex-unlock! m) | < | < < > > > > > > > > > > > > > | > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > | > > | > > > > > > > > > > > > > > > > > > > > > > | 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 | )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) (mutex-unlock! m) (launch:end-of-run-check run-id ) (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") (if (not (launch:einf-exit-status exit-info)) (exit 4)))) ))) ;; Spec for End of test ;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup ;; At transition to run COMPLETED/X do hooks ;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND ;; we can prove the process is not alive (ssh host pstree -A pid) ;; if dead safe to mark the test as killed in the db ;; State/status table ;; new ;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup ;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na ;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED ;; 0 RUNNING ==> this is actually the first condition, should not get here (define (launch:end-of-run-check run-id ) (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))) ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing (cond ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) (debug:print 0 *default-log-port* "rollup run state/status") (rmt:set-state-status-and-roll-up-run run-id) (debug:print 0 *default-log-port* "look for post hook.") (runs:run-post-hook run-id)) ((> running-cnt 3) (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) ((> running-cnt 0) (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) (let ((kill-cnt (launch:kill-tests-if-dead run-id))) (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) (launch:end-of-run-check run-id)))) ;;todo (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) (let loop ((running-test (car running-tests)) (tal (cdr running-tests))) (let* ((test-name (vector-ref running-test 2)) (item-path (vector-ref running-test 11))) (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") (if (not (null? tal)) (loop (car tal) (cdr tal)))))))))) (define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) (let* ((cmd (conc "ssh " host " pstree -A " pid)) (output (with-input-from-pipe cmd read-lines))) (print "cmd: " cmd "\n op: " output ) (if(eq? (length output) 0) #f #t)) #t)) (define (launch:kill-tests-if-dead run-id) (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) (let loop ((running-test (car running-tests)) (tal (cdr running-tests)) (kill-cnt 0)) (let* ((test-name (vector-ref running-test 2)) (item-path (vector-ref running-test 11)) (test-id (vector-ref running-test 0)) (host (vector-ref running-test 6)) (pid (rmt:test-get-top-process-pid run-id test-id)) (event-time (vector-ref running-test 5)) (duration (vector-ref running-test 12)) (flag 0) (curr-time (current-seconds))) (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed (begin (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") (set! flag 1) (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) (if (not (null? tal)) (loop (car tal) (cdr tal) (+ kill-cnt flag)) (+ kill-cnt flag)))))) ;; DO NOT USE - caching of configs is handled in launch:setup now. ;; (define (launch:cache-config) ;; if we have a linktree and -runtests and -target and the directory exists dump the config ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg (if (and *configdat* |
︙ | ︙ |
Modified rmt.scm from [0a05f35135] to [f3cb7b6857].
︙ | ︙ | |||
669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 | ;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) (define (rmt:get-count-tests-running-for-run-id run-id) (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) ;; Statistical queries (define (rmt:get-count-tests-running run-id) (rmt:send-receive 'get-count-tests-running run-id (list run-id))) (define (rmt:get-count-tests-running-for-testname run-id testname) (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) ;; state and status are extra hints not usually used in the calculation ;; (define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) (define (rmt:update-pass-fail-counts run-id test-name) (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) (define (rmt:top-test-set-per-pf-counts run-id test-name) (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) | > > > > > > > > | 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 | ;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) (define (rmt:get-count-tests-running-for-run-id run-id) (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) (define (rmt:get-not-completed-cnt run-id) (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) ;; Statistical queries (define (rmt:get-count-tests-running run-id) (rmt:send-receive 'get-count-tests-running run-id (list run-id))) (define (rmt:get-count-tests-running-for-testname run-id testname) (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) ;; state and status are extra hints not usually used in the calculation ;; (define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) (define (rmt:set-state-status-and-roll-up-run run-id) (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id))) (define (rmt:update-pass-fail-counts run-id test-name) (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) (define (rmt:top-test-set-per-pf-counts run-id test-name) (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) |
︙ | ︙ |
Modified runs.scm from [0d98a3ef41] to [f0db5bffd4].
︙ | ︙ | |||
511 512 513 514 515 516 517 | ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) | | > | 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 | ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) ;; mark all test launced flag as false in the meta table (rmt:set-var (conc "lunch-complete-" run-id) "no") ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data ;; ;; What happended, this code is now duplicated in tests!? ;; ;;====================================================================== |
︙ | ︙ | |||
665 666 667 668 669 670 671 | (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here | | | | | | | | 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 | (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here ;(if (eq? run-count 0) ; (begin ; (debug:print-info 0 *default-log-port* "Calling Post Hook") ; (debug:print-info 2 *default-log-port* " run-count " run-count) ; (runs:run-post-hook run-id)) ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) (rmt:tasks-set-state-given-param-key task-key "done") ;; (sqlite3:finalize! tasks-db) )) ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. ;; |
︙ | ︙ | |||
1638 1639 1640 1641 1642 1643 1644 | (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") (debug:print-info 0 *default-log-port* "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names | > > | | 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 | (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") (debug:print-info 0 *default-log-port* "Have leftovers!") (loop (car reg)(cdr reg) '() reruns)) (else (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; end loop on sorted test names ;; this is the point where everything is launced and now you can mark the run in metadata table as all launced (rmt:set-var (conc "lunch-complete-" run-id) "yes") ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") |
︙ | ︙ | |||
2471 2472 2473 2474 2475 2476 2477 | (define (runs:handle-locking target keys runname lock unlock user) (let* ((db #f) (rundat (mt:get-runs-by-patt keys runname target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1))) (for-each (lambda (run) | | > > > > | | > > | 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 | (define (runs:handle-locking target keys runname lock unlock user) (let* ((db #f) (rundat (mt:get-runs-by-patt keys runname target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1))) (for-each (lambda (run) (let ((run-id (db:get-value-by-header run header "id")) (str (if lock "lock" "unlock"))) (if (or lock (and unlock (or (args:get-arg "-force") (begin (print "Do you really wish to unlock run " run-id "?\n y/n: ") (equal? "y" (read-line)))))) (begin (rmt:lock/unlock-run run-id lock unlock user) (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) runs))) ;;====================================================================== ;; Rollup runs ;;====================================================================== ;; Update the test_meta table for this test |
︙ | ︙ |