1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
|
#f
#t)))))
;; given a launch delay (minimum time from last launch) return amount of time to wait
;;
;; (define (db:launch-delay-left dbstruct run-id launch-delay)
;; select end_time-now from
;; (select testname,item_path,event_time+run_duration as
;; end_time,strftime('%s','now') as now from tests where state in
;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
(let* ((incompleted '())
(oldlaunched '())
(toplevels '())
(deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime")))
(server-start-allowance 200)
(server-overloaded-budget 200)
(launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30))
(launch-monitor-on-time-budget 30)
(launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
(remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
(remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
(running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
)
(db:with-db
dbstruct #f #f
(lambda (db)
;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
;;
;; HOWEVER: this code in run:test seems to work fine
;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
|
#f
#t)))))
;; given a launch delay (minimum time from last launch) return amount of time to wait
;;
;; (define (db:launch-delay-left dbstruct run-id launch-delay)
(define (db:get-status-from-final-status-file run-dir)
(let (
(infile (conc run-dir "/.final-status")))
;; first verify we are able to write the output file
(if (not (file-read-access? infile))
(begin
(debug:print 0 *default-log-port* "ERROR: cannot read " infile)
(debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir)
#f
)
(with-input-from-file infile read-lines)
)
)
)
;; select end_time-now from
;; (select testname,item_path,event_time+run_duration as
;; end_time,strftime('%s','now') as now from tests where state in
;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
(let* ((incompleted '())
(oldlaunched '())
(toplevels '())
;; The default running-deadtime is 720 seconds = 12 minutes.
;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
(deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime")))
(server-start-allowance 200)
(server-overloaded-budget 200)
(launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30))
(launch-monitor-on-time-budget 30)
(launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
(remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
(remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
(running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
)
(debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime)
(debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim)
(db:with-db
dbstruct #f #f
(lambda (db)
;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
;;
;; HOWEVER: this code in run:test seems to work fine
;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
|
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
|
(begin
(set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
(debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
(begin
(set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
(debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration))))
db
"SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');"
run-id running-deadtime)
(sqlite3:for-each-row
(lambda (test-id run-dir uname testname item-path event-time run-duration)
(if (and (equal? uname "n/a")
(equal? item-path "")) ;; this is a toplevel test
;; what to do with toplevel? call rollup?
(begin
(set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
(debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
(begin
(debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)
(set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))))
db
"SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');"
run-id remotehoststart-deadtime)
;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
;;
;; (db:delay-if-busy dbdat)
(sqlite3:for-each-row
(lambda (test-id run-dir uname testname item-path)
(if (and (equal? uname "n/a")
|
>
|
|
|
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
|
(begin
(set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
(debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
(begin
(set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
(debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration))))
db
"SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');"
run-id running-deadtime) ;; default time 720 seconds
(sqlite3:for-each-row
(lambda (test-id run-dir uname testname item-path event-time run-duration)
(if (and (equal? uname "n/a")
(equal? item-path "")) ;; this is a toplevel test
;; what to do with toplevel? call rollup?
(begin
(set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
(debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
(begin
(debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)
(set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))))
db
"SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');"
run-id remotehoststart-deadtime) ;; default time 230 seconds.
;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
;;
;; (db:delay-if-busy dbdat)
(sqlite3:for-each-row
(lambda (test-id run-dir uname testname item-path)
(if (and (equal? uname "n/a")
|
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
|
;; incompleted))
(min-incompleted-ids (map car incompleted)) ;; do 'em all
(all-ids (append min-incompleted-ids (map car oldlaunched))))
(if (> (length all-ids) 0)
(begin
(debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD")
(for-each
(lambda (test-id)
(db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))
;;(db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) ;; fix for one aspect of Randy's ticket 1405717332 ;; TODO - fix problem where test goes to COMPLETED/DEAD while in progress, only later to go to COMPLETED/PASS. ref ticket 220546828
all-ids))))))))
;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; (sqlite3:execute
;; db
;; (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN ("
;; (string-intersperse (map conc all-ids) ",")
|
|
>
>
>
>
>
>
|
>
>
>
|
>
>
>
>
|
>
>
>
>
>
>
>
|
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
|
;; incompleted))
(min-incompleted-ids (map car incompleted)) ;; do 'em all
(all-ids (append min-incompleted-ids (map car oldlaunched))))
(if (> (length all-ids) 0)
(begin
(debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD")
(for-each
(lambda (test-id)
(let* (
(run-dir (db:test-get-rundir-from-test-id dbstruct run-id test-id))
(result (db:get-status-from-final-status-file run-dir)))
(if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result)))
(begin
(debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
(db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "PASS" "Test stopped responding but it has PASSED; marking it PASS in the DB.")
)
(begin
(debug:print 0 *default-log-port* "INFO: test " test-id " final state/status is not COMPLETED/PASS. It is " result)
(db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")
)
)
)
)
all-ids)
)
)
)
)
)
)
)
;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; (sqlite3:execute
;; db
;; (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN ("
;; (string-intersperse (map conc all-ids) ",")
|
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
|
(lambda (key)
(set! res (cons key res)))
db
"SELECT fieldname FROM keys ORDER BY id DESC;")))
(set! *db-keys* res)
res)))
;; look up values in a header/data structure
(define (db:get-value-by-header row header field)
(if (or (null? header) (not row))
#f
(let loop ((hed (car header))
(tal (cdr header))
(n 0))
|
>
>
>
>
|
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
|
(lambda (key)
(set! res (cons key res)))
db
"SELECT fieldname FROM keys ORDER BY id DESC;")))
(set! *db-keys* res)
res)))
;; extract index number given a header/data structure
(define (db:get-index-by-header header field)
(list-index (lambda (x)(equal? x field)) header))
;; look up values in a header/data structure
(define (db:get-value-by-header row header field)
(if (or (null? header) (not row))
#f
(let loop ((hed (car header))
(tal (cdr header))
(n 0))
|
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
|
(define-record simple-run target id runname state status owner event_time)
(define-record-printer (simple-run x out)
(fprintf out "#,(simple-run ~S ~S ~S ~S)"
(simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) ))))
;; simple get-runs
;;
(define (db:simple-get-runs dbstruct runpatt count offset target)
(let* ((res '())
(keys (db:get-keys dbstruct))
(runpattstr (db:patt->like "runname" runpatt))
(remfields (list "id" "runname" "state" "status" "owner" "event_time"))
(targstr (string-intersperse keys "||'/'||"))
(keystr (conc targstr " AS target,"
(string-intersperse remfields ",")))
(qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? "
;; Generate: " AND x LIKE 'keypatt' ..."
" AND target LIKE '" target "'"
" AND state != 'deleted' ORDER BY event_time DESC "
(if (number? count)
(conc " LIMIT " count)
"")
(if (number? offset)
(conc " OFFSET " offset)
""))))
(debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
(db:with-db dbstruct #f #f
(lambda (db)
(sqlite3:for-each-row
(lambda (target id runname state status owner event_time)
(set! res (cons (make-simple-run target id runname state status owner event_time) res)))
db
|
|
|
>
>
>
>
|
>
|
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
|
(define-record simple-run target id runname state status owner event_time)
(define-record-printer (simple-run x out)
(fprintf out "#,(simple-run ~S ~S ~S ~S)"
(simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) ))))
;; simple get-runs
;;
(define (db:simple-get-runs dbstruct runpatt count offset target last-update)
(let* ((res '())
(keys (db:get-keys dbstruct))
(runpattstr (db:patt->like "runname" runpatt))
(remfields (list "id" "runname" "state" "status" "owner" "event_time"))
(targstr (string-intersperse keys "||'/'||"))
(keystr (conc targstr " AS target,"
(string-intersperse remfields ",")))
(qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? "
;; Generate: " AND x LIKE 'keypatt' ..."
" AND target LIKE '" target "'"
" AND state != 'deleted' "
(if (number? last-update)
(conc " AND last_update >= " last-update)
"")
" ORDER BY event_time DESC "
(if (number? count)
(conc " LIMIT " count)
"")
(if (number? offset)
(conc " OFFSET " offset)
"")))
)
(debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
(db:with-db dbstruct #f #f
(lambda (db)
(sqlite3:for-each-row
(lambda (target id runname state status owner event_time)
(set! res (cons (make-simple-run target id runname state status owner event_time) res)))
db
|
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
|
(vector-ref inrec 2) ;; testname
(vector-ref inrec 4) ;; state
(vector-ref inrec 5) ;; status
-1 "" -1 -1 "" "-"
(vector-ref inrec 3) ;; item-path
-1 "-" "-"))
(define (db:get-tests-for-run-state-status dbstruct run-id testpatt)
(let* ((res '())
(tests-match-qry (tests:match->sqlqry testpatt))
(qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? "
(if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))))
(debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry)
(db:with-db dbstruct run-id #f
(lambda (db)
(sqlite3:for-each-row
(lambda (id testname item-path state status)
;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
(set! res (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)))
db
qry
run-id)))
res))
(define (db:get-testinfo-state-status dbstruct run-id test-id)
(let ((res #f))
(db:with-db dbstruct run-id #f
(lambda (db)
(sqlite3:for-each-row
(lambda (run-id testname item-path state status)
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
|
(vector-ref inrec 2) ;; testname
(vector-ref inrec 4) ;; state
(vector-ref inrec 5) ;; status
-1 "" -1 -1 "" "-"
(vector-ref inrec 3) ;; item-path
-1 "-" "-"))
#;(define (db:get-tests-for-run-state-status dbstruct run-id testpatt)
(let* ((res '())
(tests-match-qry (tests:match->sqlqry testpatt))
(qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? "
(if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))))
(debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry)
(db:with-db dbstruct run-id #f
(lambda (db)
(sqlite3:for-each-row
(lambda (id testname item-path state status)
;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
(set! res (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res)))
db
qry
run-id)))
res))
(define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0))
(let* ((res '())
(tests-match-qry (tests:match->sqlqry testpatt))
(qry (conc "SELECT id,testname,item_path,state,status,event_time,run_duration FROM tests WHERE run_id=? "
" AND last_update > ? "
(if tests-match-qry (conc " AND (" tests-match-qry ") ") "")
)))
(debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry)
(db:with-db dbstruct run-id #f
(lambda (db)
(sqlite3:fold-row
(lambda (res id testname item-path state status event-time run-duration)
;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
(cons (vector id run-id testname state status event-time "" -1 -1 "" "-" item-path run-duration "-" "-") res))
'()
db
qry
run-id
(or last-update 0))))
res))
(define (db:get-testinfo-state-status dbstruct run-id test-id)
(let ((res #f))
(db:with-db dbstruct run-id #f
(lambda (db)
(sqlite3:for-each-row
(lambda (run-id testname item-path state status)
|