Overview
Comment: | merge-sync'd with test-rundat2 in prep for merging rundat changes. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65 |
Files: | files | file ages | folders |
SHA1: |
e0ddc9d3c5025d9a4500e119b76631d5 |
User & Date: | matt on 2020-10-23 23:05:39 |
Other Links: | branch diff | manifest | tags |
Context
2020-10-24
| ||
23:23 | Meld'd more of testdat check-in: 426aad0a60 user: matt tags: v1.65 | |
2020-10-23
| ||
23:05 | merge-sync'd with test-rundat2 in prep for merging rundat changes. check-in: e0ddc9d3c5 user: matt tags: v1.65 | |
16:00 | fixed indentation check-in: 770f557677 user: pjhatwal tags: v1.65 | |
Changes
Modified TODO from [da5eae4898] to [dcd0f52bc7].
︙ | ︙ | |||
14 15 16 17 18 19 20 21 22 23 24 25 26 27 | # # You should have received a copy of the GNU General Public License # along with Megatest. If not, see <http://www.gnu.org/licenses/>. TODO ==== WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling WW16 . split db into megatest.db (runs etc.) db/<something>.db . release basic newview implementation | > > > > > > > | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | # # You should have received a copy of the GNU General Public License # along with Megatest. If not, see <http://www.gnu.org/licenses/>. TODO ==== WW38 . Add test_rundat to no-sync ==> correction, put in <testdir>/.meta/test-run.dat . Add STATE/STATUS transitions to .meta/test-run.dat or similar . Swizzle update-test-rundat to operate on no-sync . Swizzle update-run-duration, -uname-host and cpuload-diskfree to no-sync . On state/status change update tests table with duration WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling WW16 . split db into megatest.db (runs etc.) db/<something>.db . release basic newview implementation |
︙ | ︙ |
Modified db.scm from [132e6f02ad] to [d7c10c5a58].
︙ | ︙ | |||
3465 3466 3467 3468 3469 3470 3471 | (let* ((run-ids (db:get-all-run-ids mtdb))) (for-each (lambda (run-id) (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) run-ids))) | | | 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 | (let* ((run-ids (db:get-all-run-ids mtdb))) (for-each (lambda (run-id) (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) run-ids))) ;; Get test data using test_id, run-id is not used - but it will be! ;; (define (db:get-test-info-by-id dbstruct run-id test-id) (db:with-db dbstruct #f ;; run-id #f (lambda (db) |
︙ | ︙ |
Modified launch.scm from [fd65f376ed] to [fef9dd9e8e].
︙ | ︙ | |||
209 210 211 212 213 214 215 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) | | > > | 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - ;; top of loop encountered at "(current-seconds)" with ;; last-sync="last-sync)) (let* ((over-time (> (current-seconds) (+ last-sync update-period))) (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) (if (> delta 0.1) ;; don't bother updating with small changes load #f))) (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds |
︙ | ︙ | |||
231 232 233 234 235 236 237 | (do-sync (or new-cpu-load new-disk-free over-time)) (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) | | | 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | (do-sync (or new-cpu-load new-disk-free over-time)) (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond ((test-get-kill-request run-id test-id) (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) (set! kill-job? #t)) |
︙ | ︙ | |||
463 464 465 466 467 468 469 470 471 472 473 474 475 476 | (db:test-get-host test-info) (begin (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) (test-pid (db:test-get-process_id test-info))) (cond ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:general-call 'set-test-start-time #f test-id) (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) ) ;; prime it for running | > > > | 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | (db:test-get-host test-info) (begin (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) (test-pid (db:test-get-process_id test-info))) (cond ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. ;;((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun ;; (and (equal? (db:test-get-state test-info) "COMPLETED") ;; completed/abort => rerun if asked ;; (member (db:test-get-status test-info) '("ABORT")))) ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:general-call 'set-test-start-time #f test-id) (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) ) ;; prime it for running |
︙ | ︙ |
Modified megatest.scm from [16327ccad9] to [9752b7fe00].
︙ | ︙ | |||
521 522 523 524 525 526 527 | "-show-runconfig" "-show-config" "-show-cmdinfo" "-cleanup-db")) (no-watchdog-args-vals (filter (lambda (x) x) (map args:get-arg no-watchdog-args))) (start-watchdog (null? no-watchdog-args-vals))) | | | 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 | "-show-runconfig" "-show-config" "-show-cmdinfo" "-cleanup-db")) (no-watchdog-args-vals (filter (lambda (x) x) (map args:get-arg no-watchdog-args))) (start-watchdog (null? no-watchdog-args-vals))) ;;(print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog) ;;(BB> "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals) (if start-watchdog (thread-start! *watchdog*))) ;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions (define (open-logfile logpath-in) |
︙ | ︙ |
Modified rmt.scm from [58a6da9ea5] to [2716a38de0].
︙ | ︙ | |||
544 545 546 547 548 549 550 | ;; Just some syntatic sugar (define (rmt:register-test run-id test-name item-path) (rmt:general-call 'register-test run-id run-id test-name item-path)) (define (rmt:get-test-id run-id testname item-path) (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) | | | 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 | ;; Just some syntatic sugar (define (rmt:register-test run-id test-name item-path) (rmt:general-call 'register-test run-id run-id test-name item-path)) (define (rmt:get-test-id run-id testname item-path) (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) ;; run-id is NOT used - but it will be! ;; (define (rmt:get-test-info-by-id run-id test-id) (if (number? test-id) (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) (begin (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) (print-call-chain (current-error-port)) |
︙ | ︙ |
Modified server.scm from [3f245d9ff9] to [fc85e5121d].
︙ | ︙ | |||
324 325 326 327 328 329 330 | ;; wait for server=start-last to be three seconds old ;; (define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last")) ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) (server-key (conc (get-host-name) "-" (current-process-id)))) | | | 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | ;; wait for server=start-last to be three seconds old ;; (define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last")) ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) (server-key (conc (get-host-name) "-" (current-process-id)))) ;; (thread-sleep! (/ (random 500) 1000)) ;; I don't think this made a difference (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (delta (- (current-seconds) fmodtime)) (all-go (> delta reftime))) (if (and all-go (begin (with-output-to-file start-flag |
︙ | ︙ |
Modified tasks.scm from [0f134c8e45] to [e136a37772].
︙ | ︙ | |||
441 442 443 444 445 446 447 | ;; remove tasks given by a string of numbers comma separated (define (tasks:remove-queue-entries dbstruct task-ids) (db:with-db dbstruct #f #t (lambda (db) (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) | | | 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | ;; remove tasks given by a string of numbers comma separated (define (tasks:remove-queue-entries dbstruct task-ids) (db:with-db dbstruct #f #t (lambda (db) (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) ;; (define (tasks:process-queue dbstruct) ;; (let* ((task (tasks:snag-a-task dbstruct)) ;; (action (if task (tasks:task-get-action task) #f))) ;; (if action (print "tasks:process-queue task: " task)) ;; (if action ;; (case (string->symbol action) ;; ((run) (tasks:start-run dbstruct task)) ;; ((remove) (tasks:remove-runs dbstruct task)) |
︙ | ︙ |