Overview
Comment: | Cherrypicked removal of telemetric stuff (removing all complexities for now). |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.70-defunct-try |
Files: | files | file ages | folders |
SHA1: |
400ad607f40bc08f59698695c38e70d3 |
User & Date: | mrwellan on 2019-12-12 14:32:12 |
Other Links: | branch diff | manifest | tags |
Context
2019-12-15
| ||
20:17 | Added pkts module moved from opensrc repo. check-in: 03b3e035e2 user: matt tags: v1.70-defunct-try | |
2019-12-12
| ||
14:32 | Cherrypicked removal of telemetric stuff (removing all complexities for now). check-in: 400ad607f4 user: mrwellan tags: v1.70-defunct-try | |
2019-12-11
| ||
20:20 | Moved couple procs from runs-inc into runsmod. check-in: e6296edad4 user: matt tags: v1.70-defunct-try | |
Changes
Modified api-inc.scm from [7dfa5ca729] to [d2c2cccd89].
︙ | ︙ | |||
154 155 156 157 158 159 160 | cmd-in (string->symbol cmd-in))) (params (vector-ref dat 1)) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) | | | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | cmd-in (string->symbol cmd-in))) (params (vector-ref dat 1)) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) #;(foo (begin (common:telemetry-log (conc "api-in:"(->string cmd)) payload: `((params . ,params))) #t)) (res (if writecmd-in-readonly-mode (conc "attempt to run write command "cmd" on a read-only database") |
︙ | ︙ | |||
343 344 345 346 347 348 349 | ;; save all stats (let ((delta-t (- (current-milliseconds) start-t))) (hash-table-set! *db-api-call-time* cmd (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) (if writecmd-in-readonly-mode (begin | | | | 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | ;; save all stats (let ((delta-t (- (current-milliseconds) start-t))) (hash-table-set! *db-api-call-time* cmd (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) (if writecmd-in-readonly-mode (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #t))) (vector #f res)) (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #f))) (vector #t res)))))))) ;; http-server send-response ;; api:process-request ;; db:* |
︙ | ︙ |
Modified common-inc.scm from [b0ac284799] to [9909c0819b].
︙ | ︙ | |||
2643 2644 2645 2646 2647 2648 2649 | (handle-exceptions exn #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception (thread-join! thread)) ))) (hash-table-keys *common:thread-punchlist*))) | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < > | | | | | | | | | | | | | | | | | | | | < > | | | | | 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 | (handle-exceptions exn #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception (thread-join! thread)) ))) (hash-table-keys *common:thread-punchlist*))) ;; (define *common:telemetry-log-state* 'startup) ;; (define *common:telemetry-log-socket* #f) ;; ;; (define (common:telemetry-log-open) ;; (if (eq? *common:telemetry-log-state* 'startup) ;; (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) ;; (serverport (configf:lookup-number *configdat* "telemetry" "port")) ;; (user (or (get-environment-variable "USER") "unknown")) ;; (host (or (get-environment-variable "HOST") "unknown"))) ;; (set! *common:telemetry-log-state* ;; (handle-exceptions ;; exn ;; (begin ;; (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure") ;; 'broken) ;; (if (and serverhost serverport user host) ;; (let* ((s (udp-open-socket))) ;; ;;(udp-bind! s #f 0) ;; (udp-connect! s serverhost serverport) ;; (set! *common:telemetry-log-socket* s) ;; 'open) ;; 'not-needed)))))) ;; ;; (define (common:telemetry-log event #!key (payload '())) ;; (if (eq? *common:telemetry-log-state* 'startup) ;; (common:telemetry-log-open)) ;; ;; (if (eq? 'open *common:telemetry-log-state*) ;; (handle-exceptions ;; exn ;; (begin ;; (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)") ;; ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose) ;; ;;(common:telemetry-log-close) ;; (define *common:telemetry-log-state* 'broken-or-no-server) ;; (set! *common:telemetry-log-socket* #f) ;; ) ;; (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events ;; (let* ((user (or (get-environment-variable "USER") "unknown")) ;; (host (or (get-environment-variable "HOST") "unknown")) ;; (start (conc "[megatest "event"]")) ;; (toppath (or *toppath* "/dev/null")) ;; (payload-serialized ;; (base64:base64-encode ;; (z3:encode-buffer ;; (with-output-to-string (lambda () (pp payload)))))) ;; (msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":" ;; toppath":"payload-serialized))) ;; (udp-send *common:telemetry-log-socket* msg)))))) ;; ;; (define (common:telemetry-log-close) ;; (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*) ;; (handle-exceptions ;; exn ;; (begin ;; (define *common:telemetry-log-state* 'closed-fail) ;; (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure") ;; ) ;; (begin ;; (define *common:telemetry-log-state* 'closed) ;; (udp-close-socket *common:telemetry-log-socket*) ;; (set! *common:telemetry-log-socket* #f))))) |
Modified launch-inc.scm from [476c5d50a8] to [1f775f156c].
︙ | ︙ | |||
365 366 367 368 369 370 371 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) | | | 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) (let* ((over-time (> (current-seconds) (+ last-sync update-period))) (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) (if (> delta 0.1) ;; don't bother updating with small changes load #f))) (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds |
︙ | ︙ | |||
387 388 389 390 391 392 393 | (do-sync (or new-cpu-load new-disk-free over-time)) (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) | | | | | 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 | (do-sync (or new-cpu-load new-disk-free over-time)) (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond ((test-get-kill-request run-id test-id) (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) (set! kill-job? #t)) ((equal? status "DEAD") (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING (set! kill-job? #f))) (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) (launch:handle-zombie-tests run-id) (when do-sync ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) (if kill-job? (begin (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this ;; section and the runit section? Or add a loop that tries three times with a 1/4 second |
︙ | ︙ |
Modified rmt-inc.scm from [4a6d831f46] to [15a54ab90a].
︙ | ︙ | |||
70 71 72 73 74 75 76 | (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected | | | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) ;; do all the prep locked under the rmt-mutex (mutex-lock! *rmt-mutex*) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote |
︙ | ︙ |
Modified runs-inc.scm from [cb209f098e] to [96438abf0b].
︙ | ︙ | |||
368 369 370 371 372 373 374 | (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) ));; tests will be ANDed with this list ;; register this run in monitor.db (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) (rmt:tasks-set-state-given-param-key task-key "running") | | | 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) ));; tests will be ANDed with this list ;; register this run in monitor.db (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) (rmt:tasks-set-state-given-param-key task-key "running") #;(common:telemetry-log "run-tests" payload: `( (target . ,target) (run-name . ,runname) (test-patts . ,test-patts) ) ) ;; Now generate all the tests lists |
︙ | ︙ |