Overview
Comment: | partially borked change to better deal with run queue idiosyncracies |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.55 |
Files: | files | file ages | folders |
SHA1: |
fa1ff570f21f9a88813390929939cd8e |
User & Date: | mrwellan on 2013-09-09 16:41:54 |
Other Links: | branch diff | manifest | tags |
Context
2013-09-10
| ||
00:27 | Queue ideosyncracies seem fixed. Needs more testing... check-in: 79361f5222 user: matt tags: v1.55 | |
2013-09-09
| ||
16:41 | partially borked change to better deal with run queue idiosyncracies check-in: fa1ff570f2 user: mrwellan tags: v1.55 | |
10:12 | Fixed issue with run event_time being reset when test was rerun check-in: aaa8f2a3d5 user: mrwellan tags: v1.55 | |
Changes
Modified dashboard.scm from [d06223a752] to [5988625d24].
︙ | ︙ | |||
220 221 222 223 224 225 226 | 'testname 'itempath))) ;; ;; trim runs to only those that are changing often here ;; (for-each (lambda (run) (let* ((run-id (db:get-value-by-header run header "id")) | | | | | | 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | 'testname 'itempath))) ;; ;; trim runs to only those that are changing often here ;; (for-each (lambda (run) (let* ((run-id (db:get-value-by-header run header "id")) (tests (mt:get-tests-for-run run-id testnamepatt states statuses not-in: *hide-not-hide* sort-by: sort-by sort-order: sort-order)) ;; NOTE: bubble-up also sets the global *all-item-test-names* ;; (tests (bubble-up tmptests priority: bubble-type)) (key-vals (cdb:remote-run db:get-key-vals #f run-id))) ;; Not sure this is needed? (set! referenced-run-ids (cons run-id referenced-run-ids)) (if (> (length tests) maxtests) (set! maxtests (length tests))) |
︙ | ︙ | |||
1506 1507 1508 1509 1510 1511 1512 | (mutex-lock! *update-mutex*) (set! update-is-running *update-is-running*) (if (not update-is-running) (set! *update-is-running* #t)) (mutex-unlock! *update-mutex*) (if (not update-is-running) (begin | | | 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 | (mutex-lock! *update-mutex*) (set! update-is-running *update-is-running*) (if (not update-is-running) (set! *update-is-running* #t)) (mutex-unlock! *update-mutex*) (if (not update-is-running) (begin (dashboard:run-update x) (mutex-lock! *update-mutex*) (set! *update-is-running* #f) (mutex-unlock! *update-mutex*)))) 1)))) (iup:main-loop) |
Modified runs.scm from [085ac5aef0] to [de10964a2e].
︙ | ︙ | |||
134 135 136 137 138 139 140 141 142 143 144 145 146 147 | ;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine ;; (define *last-num-running-tests* 0) (define *runs:can-run-more-tests-count* 0) (define (runs:shrink-can-run-more-tests-count) ;; the db is a dummy var so we can use cdb:remote-run (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) (define (runs:can-run-more-tests jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (cdb:remote-run db:get-count-tests-running #f)) (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) | > > > > > > > > > | 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | ;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine ;; (define *last-num-running-tests* 0) (define *runs:can-run-more-tests-count* 0) (define (runs:shrink-can-run-more-tests-count) ;; the db is a dummy var so we can use cdb:remote-run (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) ;; Temporary globals. Move these into the logic or into common ;; (define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run (define (runs:inc-cant-run-tests testname) (hash-table-set! *seen-cant-run-tests* testname (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1))) (define (runs:can-keep-running? testname n) (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n)) (define (runs:can-run-more-tests jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while (else 0))) (let* ((num-running (cdb:remote-run db:get-count-tests-running #f)) (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) |
︙ | ︙ | |||
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n can-run-more: " can-run-more) (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed))) (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (eq? testmode 'toplevel)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) (list hed tal reg reruns)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) | > | > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | > > > > > > | | > > > | 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 | "\n fails: " (runs:pretty-string fails) "\n testmode: " testmode "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) "\n (null? non-completed): " (null? non-completed) "\n reruns: " reruns "\n items: " items "\n can-run-more: " can-run-more) (cond ;; all prereqs met, fire off the test ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch ((or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed))) (debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (eq? testmode 'toplevel)(null? non-completed)))") (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) (list hed tal reg reruns)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((and (null? fails) (not (null? non-completed))) (debug:print-info 1 "no fails in prerequisites for " hed ", waiting on tests; " (string-intersperse (map (lambda (x) (if (string? x) x (runs:make-full-test-name (db:test-get-testname x) (db:test-get-item-path x)))) non-completed) ", ") ", delay launching " hed) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal ;; but also lets check that the prerequisites are all in the newtal or reruns lists ;; (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) ;; (append newtal reruns))) ;; (prereqstrs (map (lambda (x)(if (string? x) x (db:test-get-testname x))) ;; prereqs-not-met)) ;; (notinqueue (filter (lambda (x) ;; (not (member x allinqueue))) ;; prereqstrs))) ;; (if (null? notinqueue) ;; (if (runs:can-keep-running? hed 5) ;; (begin ;; (runs:inc-cant-run-tests hed) ;; (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; (begin ;; (debug:print 0 "WARNING: dropping " hed " from queue as it has prerequisites missing from the queue: " (string-intersperse notinqueue ", ")) ;; (list (runs:queue-next-hed tal reg reglen regfull) ;; (runs:queue-next-tal tal reg reglen regfull) ;; (runs:queue-next-reg tal reg reglen regfull) ;; reruns))) (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; )) ;; an issue with prereqs not yet met? ((and (null? fails) (null? non-completed)) (if (runs:can-keep-running? hed 5) (begin (runs:inc-cant-run-tests hed) (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; num-retries code was here ;; we use this opportunity to move contents of reg to tal (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? (begin (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ((and (not (null? fails))(eq? testmode 'normal)) (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (if (or (not (null? reg))(not (null? tal))) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) (cons hed reruns)) #f)) ;; #f flags do not loop (else (debug:print 1 "ERROR: No handler for this condition.") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry) (let* ((run-limits-info (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) (prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (loop-list (list hed tal reg reruns))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) (conc " WARNING: t is not a vector=" t ))) prereqs-not-met) ", ") ") fails: " fails) (if (not (null? prereqs-not-met)) (debug:print-info 1 "waiting on tests; " (string-intersperse prereqs-not-met ", "))) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (debug:print-info 4 "run-limits-info = " run-limits-info) (cond ;; Check item path against item-patts, |
︙ | ︙ | |||
442 443 444 445 446 447 448 | reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) | > > > > | | | | | | | | | | | | | | 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 | reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) (if (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs (begin (cdb:tests-register-test *runremote* run-id test-name item-path) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)) (let ((th (make-thread (lambda () (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) (mutex-unlock! registry-mutex) ;; If haven't done it before register a top level test if this is an itemized test (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) (cdb:tests-register-test *runremote* run-id test-name "")) (cdb:tests-register-test *runremote* run-id test-name item-path) (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg |
︙ | ︙ | |||
513 514 515 516 517 518 519 520 521 522 523 524 525 526 | ;; must be we have unmet prerequisites ;; (else (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. (if (null? fails) (begin ;; couldn't run, take a breather (debug:print-info 0 "Waiting for more work to do...") (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again | > > > > | 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 | ;; must be we have unmet prerequisites ;; (else (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. (if (not (null? prereqs-not-met)) (debug:print-info 1 "waiting on tests; " (string-intersperse prereqs-not-met ", "))) (if (null? fails) (begin ;; couldn't run, take a breather (debug:print-info 0 "Waiting for more work to do...") (thread-sleep! 1) (list (car newtal)(cdr newtal) reg reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again |
︙ | ︙ | |||
837 838 839 840 841 842 843 | (force (set! runflag #t)) ;; NOT_STARTED, run no matter what ((member (test:get-state testdat) '("DELETED" "NOT_STARTED"))(set! runflag #t)) ;; not -rerun and PASS, WARN or CHECK, do no run ((and (or (not rerun) keepgoing) ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK | | | 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 | (force (set! runflag #t)) ;; NOT_STARTED, run no matter what ((member (test:get-state testdat) '("DELETED" "NOT_STARTED"))(set! runflag #t)) ;; not -rerun and PASS, WARN or CHECK, do no run ((and (or (not rerun) keepgoing) ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) (member (test:get-state testdat) '("COMPLETED")))) (debug:print-info 2 "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) (hash-table-set! test-registry full-test-name 'COMPLETED) (set! runflag #f)) ;; -rerun and status is one of the specifed, run it ((and rerun (let* ((rerunlst (string-split rerun ",")) |
︙ | ︙ |
Modified tests/fullrun/tests/blocktestxz/testconfig from [ffee4ad8a2] to [ea79db52b2].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | [setup] runscript main.sh [items] THESTATE UNKNOWN INCOMPLETE KILLED KILLREQ STUCK BOZZLEBLONKED STUCK/DEAD THESTATUS PASS FAIL STUCK/DEAD SKIP [test_meta] author matt owner bob description This test will fail causing the dependent test "testxz"\ to never run. This triggers the code that must determine\ that a test will never be run and thus remove it from\ the queue of tests to be run. | > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | [setup] runscript main.sh [items] THESTATE UNKNOWN INCOMPLETE KILLED KILLREQ STUCK BOZZLEBLONKED STUCK/DEAD THESTATUS PASS FAIL STUCK/DEAD SKIP [requirements] waiton sqlitespeed [test_meta] author matt owner bob description This test will fail causing the dependent test "testxz"\ to never run. This triggers the code that must determine\ that a test will never be run and thus remove it from\ the queue of tests to be run. |
︙ | ︙ |