Overview
Comment: | Disconnecting tests from servers where possible to save connections. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.80-disconnect-tests |
Files: | files | file ages | folders |
SHA1: |
cdac5f3909ea7285c72e632a05f7bb22 |
User & Date: | matt on 2023-01-15 18:27:13 |
Other Links: | branch diff | manifest | tags |
Context
2023-01-15
| ||
21:45 | Appear to have fixed the growing connections issue check-in: f78750d901 user: matt tags: v1.80-disconnect-tests | |
18:27 | Disconnecting tests from servers where possible to save connections. check-in: cdac5f3909 user: matt tags: v1.80-disconnect-tests | |
2023-01-13
| ||
08:22 | Yet another db in transaction problem fixed. check-in: 95b6039c92 user: matt tags: v1.80 | |
Changes
Modified db.scm from [1553c753c1] to [e71eb01cf3].
︙ | ︙ | |||
90 91 92 93 94 95 96 | (db:hoh-set! dat key1 key2 val))))) (define (db:hoh-get dat key1 key2) (let* ((subhash (hash-table-ref/default dat key1 #f))) (and subhash (hash-table-ref/default subhash key2 #f)))) | | | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | (db:hoh-set! dat key1 key2 val))))) (define (db:hoh-get dat key1 key2) (let* ((subhash (hash-table-ref/default dat key1 #f))) (and subhash (hash-table-ref/default subhash key2 #f)))) (define (db:get-cache-stmth dbdat db stmt) (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id)) (stmt-cache (dbr:dbdat-stmt-cache dbdat)) (stmth (db:hoh-get stmt-cache db stmt))) (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) (db:hoh-set! stmt-cache db stmt newstmth) newstmth)))) |
︙ | ︙ | |||
1111 1112 1113 1114 1115 1116 1117 | (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth1 (db:get-cache-stmth | | | | | 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 | (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth1 (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');")) (stmth2 (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');")) (stmth3 (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"))) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) |
︙ | ︙ | |||
1357 1358 1359 1360 1361 1362 1363 | (let* ((res #f)) (db:with-db dbstruct #f #f ;; for the moment vars are only stored in main.db (lambda (dbdat db) (sqlite3:for-each-row (lambda (val) (set! res val)) | < | | 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 | (let* ((res #f)) (db:with-db dbstruct #f #f ;; for the moment vars are only stored in main.db (lambda (dbdat db) (sqlite3:for-each-row (lambda (val) (set! res val)) (db:get-cache-stmth dbdat db "SELECT val FROM metadat WHERE var=?;") var) ;; convert to number if can (if (string? res) (let ((valnum (string->number res))) (if valnum (set! res valnum)))) res)))) (define (db:inc-var dbstruct var) |
︙ | ︙ | |||
1390 1391 1392 1393 1394 1395 1396 | ;; (begin ;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) ;; (set! *last-global-delta-printed* *global-delta*))) (define (db:set-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) | | > | 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 | ;; (begin ;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) ;; (set! *last-global-delta-printed* *global-delta*))) (define (db:set-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") var val)))) (define (db:add-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var)))) (define (db:del-var dbstruct var) |
︙ | ︙ | |||
1812 1813 1814 1815 1816 1817 1818 | dbstruct #f #f (lambda (dbdat db) ;; remove previous data | | | | | | 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 | dbstruct #f #f (lambda (dbdat db) ;; remove previous data (let* ((stmt1 (db:get-cache-stmth dbdat db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) (stmt2 (db:get-cache-stmth dbdat db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) (res (sqlite3:with-transaction db (lambda () (for-each (lambda (dat) (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) (apply sqlite3:execute stmt2 run-id dat)) stats))))) ;; (sqlite3:finalize! stmt1) ;; (sqlite3:finalize! stmt2) ;; (mutex-unlock! *db-transaction-mutex*) res)))) (define (db:get-main-run-stats dbstruct run-id) (db:with-db dbstruct #f ;; this data comes from main |
︙ | ︙ | |||
2430 2431 2432 2433 2434 2435 2436 | ;; NOTE: run-id is not used ;; ;; (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) (db:with-db dbstruct run-id #f (lambda (dbdat db) | | > | | | | | | | | 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 | ;; NOTE: run-id is not used ;; ;; (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) (db:with-db dbstruct run-id #f (lambda (dbdat db) (db:test-set-state-status-db dbdat db run-id test-id newstate newstatus newcomment)))) ;; dbdat needed for cached prepared statements (define (db:test-set-state-status-db dbdat db run-id test-id newstate newstatus newcomment) (cond ((and newstate newstatus newcomment) (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) test-id)) ((and newstate newstatus) (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET state=?,status=? WHERE id=?;") newstate newstatus test-id)) (else (if newstate (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET state=? WHERE id=?;") newstate test-id)) (if newstatus (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET status=? WHERE id=?;") newstatus test-id)) (if newcomment (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET comment=? WHERE id=?;") newcomment ;; (sdb:qry 'getid newcomment) test-id)))) ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function ) ;; NEW BEHAVIOR: Count tests running in all runs! ;; (define (db:get-count-tests-running dbstruct run-id) ;; fastmode) (let* ((qry ;; (if fastmode ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;" "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")) ;; ) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth)))))) ;; NEW BEHAVIOR: Count tests running in only one run! ;; (define (db:get-count-tests-actually-running dbstruct run-id) (db:with-db dbstruct |
︙ | ︙ | |||
2488 2489 2490 2491 2492 2493 2494 | ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;" "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; ) (db:with-db dbstruct run-id #f (lambda (dbdat db) | | | | 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 | ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;" "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; ) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth run-id)))))) ;; For a given testname how many items are running? Used to determine ;; probability for regenerating html ;; (define (db:get-count-tests-running-for-testname dbstruct run-id testname) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;") (stmth (db:get-cache-stmth dbdat db stmt))) (sqlite3:first-result stmth run-id testname))))) (define (db:get-not-completed-cnt dbstruct run-id) (db:with-db dbstruct run-id |
︙ | ︙ | |||
2797 2798 2799 2800 2801 2802 2803 | (define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) (db:with-db dbstruct run-id #t (lambda (dbdat db) (sqlite3:execute | | | | 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 | (define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) (db:with-db dbstruct run-id #t (lambda (dbdat db) (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);") test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) (define (db:delete-steps-for-test! dbstruct run-id test-id) |
︙ | ︙ | |||
2875 2876 2877 2878 2879 2880 2881 | (define (db:get-data-info-by-id dbstruct run-id test-data-id) (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; (db:with-db dbstruct run-id #f (lambda (dbdat db) | | | 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 | (define (db:get-data-info-by-id dbstruct run-id test-data-id) (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth (db:get-cache-stmth dbdat db stmt)) (res (sqlite3:fold-row (lambda (res id test-id category variable value expected tol units comment status type last-update) (vector id test-id category variable value expected tol units comment status type last-update)) (vector #f #f #f #f #f #f #f #f #f #f #f #f) stmth test-data-id))) res))))) |
︙ | ︙ | |||
3223 3224 3225 3226 3227 3228 3229 | dbstruct run-id #f (lambda (dbdat db) (let ((tr-res (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction | | | | 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 | dbstruct run-id #f (lambda (dbdat db) (let ((tr-res (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction (db:test-set-state-status-db dbdat db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test (state-statuses (db:roll-up-rules state-status-counts state status)) (newstate (car state-statuses)) (newstatus (cadr state-statuses))) (set! new-state-eh newstate) (set! new-status-eh newstatus) (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " (apply conc (map (lambda (x) (conc (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) state-status-counts))); end debug:print (if tl-test-id (db:test-set-state-status-db dbdat db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct )))))) (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) (if new-state-eh ;; moved from db:test-set-state-status (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh)) tr-res))))) |
︙ | ︙ | |||
3585 3586 3587 3588 3589 3590 3591 | (string->symbol stmtname) stmtname) db:queries))) (if q (car q) #f)))) (db:with-db dbstruct run-id #f (lambda (dbdat db) | | | 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 | (string->symbol stmtname) stmtname) db:queries))) (if q (car q) #f)))) (db:with-db dbstruct run-id #f (lambda (dbdat db) (apply sqlite3:execute (db:get-cache-stmth dbdat db query) params) #t)))) ;; get a summary of state and status counts to calculate a rollup ;; (define (db:get-state-status-summary dbstruct run-id testname) (let ((res '())) (db:with-db |
︙ | ︙ |
Modified launch.scm from [9881087e2c] to [fc0040c21d].
︙ | ︙ | |||
220 221 222 223 224 225 226 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) | < | | | | < | | | < | < | 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) (let* ((over-time (> (current-seconds) (+ last-sync update-period))) (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) (if (> delta 0.1) ;; don't bother updating with small changes load #f))) (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds (get-df (current-directory)) disk-free)) (delta (abs (- df disk-free)))) (if (and (> df 0) (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg df #f))) (do-sync (or new-cpu-load new-disk-free over-time)) ;; instead of looking for KILLREQ we are looking for a file KILLREQUEST - see tests.scm test-get-kill-request ;; (test-info (rmt:get-test-info-by-id run-id test-id)) ;; (state (db:test-get-state test-info)) ;; (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) (cond ((test-get-kill-request run-id test-id) (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) (set! kill-job? #t)) #;((equal? status "DEAD") (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING (set! kill-job? #f))) (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) (launch:handle-zombie-tests run-id) (when do-sync (with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) ;; (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) ) (if kill-job? (begin (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this |
︙ | ︙ |
Modified tests.scm from [5c2006972a] to [20e6047a42].
︙ | ︙ | |||
1964 1965 1966 1967 1968 1969 1970 | ;;====================================================================== ;; test steps ;;====================================================================== ;; teststep-set-status! used to be here (define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) | > > | | 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 | ;;====================================================================== ;; test steps ;;====================================================================== ;; teststep-set-status! used to be here (define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) (or (file-exists? (conc (getenv "MT_TEST_RUN_DIR")"/KILLREQUEST")) (file-exists? (conc (getenv "MT_LINKTREE")"/"(getenv "MT_TARGET")"/"(getenv "MT_RUNNAME")"/KILLREQUEST"))) #;(let* ((testdat (rmt:get-test-info-by-id run-id test-id))) (and testdat (equal? (test:get-state testdat) "KILLREQ")))) (define (test:tdb-get-rundat-count tdb) (if tdb (let ((res 0)) (sqlite3:for-each-row |
︙ | ︙ |