Megatest

Diff
Login

Differences From Artifact [ebea7e9d0d]:

To Artifact [62e275181d]:


1696
1697
1698
1699
1700
1701
1702
1703

1704
1705
1706
1707
1708
1709
1710
1696
1697
1698
1699
1700
1701
1702

1703
1704
1705
1706
1707
1708
1709
1710







-
+







                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
                (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
              (begin
                (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)
                (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))))
        db
        "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');"
        run-id remotehoststart-deadtime) ;; default time 230 seconds.
        run-id remotehoststart-deadtime) ;; default time 230 seconds

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
1730
1731
1732
1733
1734
1735
1736

1737
1738
1739
1740
1741
1742
1743
1744
1745
1746










1747
1748
1749
1750







1751
1752
1753
1754
1755






1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740







1741
1742
1743
1744
1745
1746
1747
1748
1749
1750




1751
1752
1753
1754
1755
1756
1757





1758
1759
1760
1761
1762
1763








1764
1765
1766
1767
1768
1769
1770







+



-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-







              ;;      			(or (not dbexists) ;; if no file then something wrong - mark as incomplete
              ;;      			    (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim
              ;;      		    incompleted))
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
	       ;; (launch:is-test-alive "localhost" 435)
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD")
               (for-each
                  (lambda (test-id)
                    (let* (
                      (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id))
                      (result (db:get-status-from-final-status-file run-dir)))
                    (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
                      (begin
                        (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
                        (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "PASS" "Test stopped responding but it has PASSED; marking it PASS in the DB.")
                    (let* (;; (run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id))
			   (tinfo   (db:get-test-info-by-id dbstruct run-id test-id))
			   (run-dir (db:test-get-rundir     tinfo))
			   (host    (db:test-get-host       tinfo))
			   (pid     (db:test-get-process_id tinfo))
			   (result (db:get-status-from-final-status-file run-dir)))
		      (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
			  (begin
			    (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
			    (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "PASS"
                      )
                      (begin
                      (debug:print 0 *default-log-port* "INFO: test " test-id " final state/status is not COMPLETED/PASS. It is " result)
                      (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")
								   "Test stopped responding but it has PASSED; marking it PASS in the DB."))
			  (let ((is-alive (launch:is-test-alive host pid)))
			    (if is-alive
				(debug:print 0 *default-log-port* "INFO: test " test-id " on host " host " has a process on pid " pid ", NOT setting to DEAD.")
				(begin
				  (debug:print 0 *default-log-port* "INFO: test " test-id " final state/status is not COMPLETED/PASS. It is " result)
				  (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD"
                      )
                     ) ;;call end of eud of run detection for posthook
                     (launch:end-of-run-check run-id)
                    )
                  )
									 "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
		  all-ids)
	       ;;call end of eud of run detection for posthook
	       (launch:end-of-run-check run-id)
	       )))))))

                  all-ids)
             )
         )
       )
     )
   )
 )
)

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 
;; 		   (string-intersperse (map conc all-ids) ",")