Overview
Comment: | Tweaks to fix the premature test launching issues |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | test-specific-db |
Files: | files | file ages | folders |
SHA1: |
658cf0113708c0330262254687c8a5be |
User & Date: | mrwellan on 2012-09-24 17:20:06 |
Other Links: | branch diff | manifest | tags |
Context
2012-09-24
| ||
17:20 | Tweaks to fix the premature test launching issues Leaf check-in: 658cf01137 user: mrwellan tags: test-specific-db | |
00:02 | Added comment on mode check-in: eb47e2c93d user: matt tags: test-specific-db | |
Changes
Modified db.scm from [589b14ba9e] to [c259cfdf98].
︙ | ︙ | |||
948 949 950 951 952 953 954 | (sqlite3:execute db "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'), pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED')) WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name run-id test-name) | | | | | | 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 | (sqlite3:execute db "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'), pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED')) WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name run-id test-name) ;; (thread-sleep! 0.1) ;; give other processes a chance here (if (member status '("NOT_STARTED" "LAUNCHED" "RUNNING" "REMOTEHOSTSTART")) ;; running takes priority over all other states, force the test state to RUNNING (sqlite3:execute db "UPDATE tests SET state=? WHERE run_id=? AND testname=? AND item_path='';" status run-id test-name) (sqlite3:execute db "UPDATE tests SET state=CASE WHEN (SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' ELSE 'COMPLETED' END, status=CASE WHEN fail_count > 0 THEN 'FAIL' WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' ELSE 'UNKNOWN' END WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name)) #f) #f)) |
︙ | ︙ | |||
1073 1074 1075 1076 1077 1078 1079 | (if (not (eof-object? lin)) (begin (debug:print 4 lin) (rdb:csv->test-data db test-id lin) (loop (read-line))))) ;; roll up the current results. ;; FIXME: Add the status to | | | 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 | (if (not (eof-object? lin)) (begin (debug:print 4 lin) (rdb:csv->test-data db test-id lin) (loop (read-line))))) ;; roll up the current results. ;; FIXME: Add the status to (db:test-data-rollup db test-id #f)) ;; WARNING: Do NOT call this for the parent test on an iterated test ;; Roll up test_data pass/fail results ;; look at the test_data status field, ;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. ;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored (define (db:test-data-rollup db test-id status) |
︙ | ︙ | |||
1220 1221 1222 1223 1224 1225 1226 | '() (let* ((unmet-pre-reqs '()) (result '())) (for-each (lambda (waitontest-name) ;; by getting the tests with matching name we are looking only at the matching test ;; and related sub items | | | > | 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 | '() (let* ((unmet-pre-reqs '()) (result '())) (for-each (lambda (waitontest-name) ;; by getting the tests with matching name we are looking only at the matching test ;; and related sub items (let ((tests (open-run-close db:get-tests-for-run db run-id waitontest-name #f '() '())) (ever-seen #f) (parent-waiton-met #f) (item-waiton-met #f)) (for-each (lambda (test) ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ... (let* ((state (db:test-get-state test)) (status (db:test-get-status test)) (item-path (db:test-get-item-path test)) (is-completed (equal? state "COMPLETED")) (is-ok (member status '("PASS" "WARN" "CHECK" "WAIVED"))) (same-itempath (equal? ref-item-path item-path))) (set! ever-seen #t) (cond ;; case 1, non-item (parent test) is completed and ok ((and (equal? item-path "") ;; this is the parent test is-completed (or is-ok (eq? mode 'toplevel))) (set! parent-waiton-met #t)) ;; ((and same-itempath is-completed (or is-ok (eq? mode 'toplevel))) (set! item-waiton-met #t))))) tests) (if (not (or parent-waiton-met item-waiton-met)) (set! result (append (if (null? tests) (list waitontest-name) tests) result))) |
︙ | ︙ |
Modified launch.scm from [c4ef02d1b8] to [73000cbbad].
︙ | ︙ | |||
278 279 280 281 282 283 284 | (system (conc "kill -9 " p-id)))))) (car processes)) (system (conc "kill -9 " pid)))) (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") (open-run-close test-set-status! #f test-id "KILLED" "FAIL" (args:get-arg "-m") #f) | | | 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | (system (conc "kill -9 " p-id)))))) (car processes)) (system (conc "kill -9 " pid)))) (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") (open-run-close test-set-status! #f test-id "KILLED" "FAIL" (args:get-arg "-m") #f) ;; (sqlite3:finalize! tdb) (exit 1)))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) ;; (sqlite3:finalize! db) (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses (loop (calc-minutes))))))) (th1 (make-thread monitorjob)) |
︙ | ︙ | |||
330 331 332 333 334 335 336 | (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no ) (mutex-unlock! m) ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") | | | | 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no ) (mutex-unlock! m) ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") ;; (sqlite3:finalize! db) ;; (sqlite3:finalize! tdb) (if (not (vector-ref exit-info 1)) (exit 4))))))) ;; set up the very basics needed for doing anything here. (define (setup-for-run) ;; would set values for KEYS in the environment here for better support of env-override but ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to |
︙ | ︙ |
Modified runs.scm from [5395094d8f] to [eac0e08035].
︙ | ︙ | |||
276 277 278 279 280 281 282 283 284 285 286 287 288 289 | (if (not (null? required-tests)) (debug:print 1 "INFO: Adding " required-tests " to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (runs:run-tests-queue run-id runname test-records keyvallst flags) (debug:print 4 "INFO: All done by here"))) ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue run-id runname test-records keyvallst flags) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (item-patts (hash-table-ref/default flags "-itempatt" #f))) | > > > > > > > > > > > > > > > > > > > > > | 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | (if (not (null? required-tests)) (debug:print 1 "INFO: Adding " required-tests " to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (runs:run-tests-queue run-id runname test-records keyvallst flags) (debug:print 4 "INFO: All done by here"))) ;; testname is hed and remtests is tal, can be testname strings or testqueue vectors ;; remaining-items are other items for the current test that have not been run yet ;; this is used in calculating the state of toplevel tests. They are NOT COMPLETED ;; until all items are COMPLETED and thus not in this list. (define (runs:remaining-items testdat remtests) (let* ((testname (tests:testqueue-get-testname testdat)) ;; extract the name of the test (may have vector record) (itempath (tests:testqueue-get-itempath testdat)) (toptestname (if (string? testname) (car (string-split testname "/")) (begin (debug:print 0 "ERROR: Should have a string testname here! Please report this as a bug :(") testname)))) (filter (lambda (test) (let ((tname (tests:testqueue-get-testname test)) (ipath (tests:testqueue-get-itempath test))) (and (equal? tname testname) (and (not (equal? ipath "")) (not (equal? ipath itempath)))))) remtests))) ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue run-id runname test-records keyvallst flags) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (item-patts (hash-table-ref/default flags "-itempatt" #f))) |
︙ | ︙ | |||
333 334 335 336 337 338 339 | (begin (debug:print 0 "ERROR: test " hed " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ((not items) ;; when false the test is ok to be handed off to launch (but not before) (let* ((have-resources (open-run-close runs:can-run-more-tests #f test-record)) ;; look at the test jobgroup and tot jobs running | | | 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 | (begin (debug:print 0 "ERROR: test " hed " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ((not items) ;; when false the test is ok to be handed off to launch (but not before) (let* ((have-resources (open-run-close runs:can-run-more-tests #f test-record)) ;; look at the test jobgroup and tot jobs running (prereqs-not-met (db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) (fails (calc-fails prereqs-not-met)) (non-completed (calc-not-completed prereqs-not-met))) (debug:print 8 "INFO: have-resources: " have-resources " prereqs-not-met: " (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) |
︙ | ︙ | |||
357 358 359 360 361 362 363 | ;; no loop here, just drop though and use the loop at the bottom (if (patt-list-match item-path item-patts) (run:test run-id runname keyvallst test-record flags #f) (debug:print 1 "INFO: Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " item-patts)) ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites ) | | > > > | | | | 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 | ;; no loop here, just drop though and use the loop at the bottom (if (patt-list-match item-path item-patts) (run:test run-id runname keyvallst test-record flags #f) (debug:print 1 "INFO: Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " item-patts)) ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites ) ((not have-resources) ;; simply try again after waiting a second, but register the test ;; so the itemized tests have place holders (open-run-close tests:register-test db run-id (tests:testqueue-get-testname hed) item-path) (thread-sleep! (+ 1 *global-delta*)) (debug:print 1 "INFO: no resources to run new tests, waiting ...") ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (loop hed tal)) ;; (car newtal)(cdr newtal))) WHY DID I REORDER!!? (else ;; must be we have unmet prerequisites (debug:print 4 "FAILS: " fails) ;; If one or more of the prereqs-not-met are FAIL then we can issue ;; a message and drop hed from the items to be processed. (if (null? fails) (begin ;; couldn't run, take a breather (debug:print 4 "INFO: Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") (thread-sleep! (+ 1 *global-delta*)) ;; long sleep here - no resources, may as well be patient ;; we made new tal by sticking hed at the back of the list. BUT WHY? (loop hed tal)) ;; (car newtal)(cdr newtal))) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") (loop (car tal)(cdr tal))) (begin |
︙ | ︙ | |||
443 444 445 446 447 448 449 | (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) (loop hed tal)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) | | | 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) (loop hed tal)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) ;; AGAIN, WHY DID I TRY TO ROTATE THE TESTS HERE? (loop (car newtal)(cdr newtal))) ;; an issue with prereqs not yet met? ((and (not (null? fails))(eq? testmode 'normal)) (debug:print 1 "INFO: test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (if (not (null? tal)) (loop (car tal)(cdr tal)))) |
︙ | ︙ |
Modified test_records.scm from [9245906f33] to [4ca0c9e265].
1 2 | ;; make-vector-record tests testqueue testname testconfig waitons priority items (define (make-tests:testqueue)(make-vector 7 #f)) | > > | > | | > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | ;; make-vector-record tests testqueue testname testconfig waitons priority items (define (make-tests:testqueue)(make-vector 7 #f)) ;; modified to treat the param either as a string (pure name) or vec (testqueue record) (define-inline (tests:testqueue-get-testname vec) (if (string? vec) (car (string-split vec "/"))(vector-ref vec 0))) (define-inline (tests:testqueue-get-testconfig vec) (vector-ref vec 1)) (define-inline (tests:testqueue-get-waitons vec) (vector-ref vec 2)) (define-inline (tests:testqueue-get-priority vec) (vector-ref vec 3)) ;; items: #f=no items, list=list of items remaining, proc=need to call to get items (define-inline (tests:testqueue-get-items vec) (vector-ref vec 4)) (define-inline (tests:testqueue-get-itemdat vec) (vector-ref vec 5)) (define-inline (tests:testqueue-get-item_path vec) (if (string? vec) (let ((tmp (cdr (string-split vec "/")))) (if (null? tmp) "" (car tmp)) (vector-ref vec 6)))) (define-inline (tests:testqueue-set-testname! vec val)(vector-set! vec 0 val)) (define-inline (tests:testqueue-set-testconfig! vec val)(vector-set! vec 1 val)) (define-inline (tests:testqueue-set-waitons! vec val)(vector-set! vec 2 val)) (define-inline (tests:testqueue-set-priority! vec val)(vector-set! vec 3 val)) (define-inline (tests:testqueue-set-items! vec val)(vector-set! vec 4 val)) (define-inline (tests:testqueue-set-itemdat! vec val)(vector-set! vec 5 val)) (define-inline (tests:testqueue-set-item_path! vec val)(vector-set! vec 6 val)) |
︙ | ︙ |
Modified tests.scm from [9c29f324c9] to [fafb372d78].
︙ | ︙ | |||
369 370 371 372 373 374 375 | (waitons (tests:testqueue-get-waitons test-record)) (keep-test #t) (test-id (db:get-test-id db run-id test-name item-path)) (tdat (db:get-test-info-by-id db test-id))) (if tdat (begin ;; Look at the test state and status | < | | | > | < | | | 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 | (waitons (tests:testqueue-get-waitons test-record)) (keep-test #t) (test-id (db:get-test-id db run-id test-name item-path)) (tdat (db:get-test-info-by-id db test-id))) (if tdat (begin ;; Look at the test state and status (if (or (and (member (db:test-get-status tdat) '("PASS" "WARN" "WAIVED" "CHECK")) (equal? (db:test-get-state tdat) "COMPLETED")) (member (db:test-get-state tdat) '("INCOMPLETE" "KILLED"))) (set! keep-test #f)) ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test ;; from the runnable list (if keep-test (for-each (lambda (waiton) ;; for now we are waiting only on the parent test (let* ((parent-test-id (db:get-test-id db run-id waiton "")) (wtdat (db:get-test-info-by-id db test-id))) (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED") (member (db:test-get-status wtdat) '("FAIL"))) (member (db:test-get-status wtdat) '("KILLED")) (member (db:test-get-state wtdat) '("INCOMPETE"))) (set! keep-test #f)))) ;; no point in running this one again waitons)))) (if keep-test (set! runnables (cons testkeyname runnables))))) testkeynames) runnables)) ;;====================================================================== |
︙ | ︙ |