Overview
Comment: | Remove couple calls to telemetry stuff |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.6569-multi-db-wip |
Files: | files | file ages | folders |
SHA1: |
9bb722e1d36c10014bcac90b01618155 |
User & Date: | matt on 2021-02-12 21:46:01 |
Other Links: | branch diff | manifest | tags |
Context
2021-02-13
| ||
19:16 | updated couple unit tests check-in: 22975ea8da user: matt tags: v1.6569-multi-db-wip (unpublished) | |
2021-02-12
| ||
21:46 | Remove couple calls to telemetry stuff check-in: 9bb722e1d3 user: matt tags: v1.6569-multi-db-wip (unpublished) | |
20:49 | statement caching in db:get-tests-for-run-state-status was broken. check-in: 80d870f848 user: matt tags: v1.6569-multi-db-wip (unpublished) | |
Changes
Modified dbmod.scm from [2f7d92ecb5] to [fea7eb8b6f].
︙ | ︙ | |||
2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 | (vector-ref inrec 3) ;; item-path -1 "-" "-")) ;; ;; 1. cache tests-match-qry ;; 2. compile qry and store in hash ;; 3. convert for-each-row to fold ;; #;(define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0)) (db:with-db dbstruct run-id #f (lambda (db) ` (let* ((res '()) (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) | > > | 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 | (vector-ref inrec 3) ;; item-path -1 "-" "-")) ;; ;; 1. cache tests-match-qry ;; 2. compile qry and store in hash ;; 3. convert for-each-row to fold ;; ;; THERE IS A BUG IN THIS ONE, MAYBE THE HOH STUFF AIN'T WORKING? ;; #;(define (db:get-tests-for-run-state-status dbstruct run-id testpatt #!optional (last-update 0)) (db:with-db dbstruct run-id #f (lambda (db) ` (let* ((res '()) (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) |
︙ | ︙ |
Modified launch.scm from [6cf6d14015] to [7ad49f6487].
︙ | ︙ | |||
215 216 217 218 219 220 221 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) | | | 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) (let* ((over-time (> (current-seconds) (+ last-sync update-period))) (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) (if (> delta 0.1) ;; don't bother updating with small changes load #f))) (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds |
︙ | ︙ | |||
237 238 239 240 241 242 243 | (do-sync (or new-cpu-load new-disk-free over-time)) (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) | | | | | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | (do-sync (or new-cpu-load new-disk-free over-time)) (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond ((test-get-kill-request run-id test-id) (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) (set! kill-job? #t)) ((equal? status "DEAD") (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING (set! kill-job? #f))) (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) (launch:handle-zombie-tests run-id) (when do-sync ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) (if kill-job? (begin (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this ;; section and the runit section? Or add a loop that tries three times with a 1/4 second |
︙ | ︙ |