Overview
Comment: | Added back the improved db:find-and-mark-incomplete routine. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.55 | v1.5519 |
Files: | files | file ages | folders |
SHA1: |
d8f671770d1aa303cac1f400c3a10c1a |
User & Date: | mrwellan on 2014-05-20 17:20:19 |
Other Links: | branch diff | manifest | tags |
Context
2014-05-20
| ||
23:54 | Properly deal with roll up vs. setting to INCOMPLETE in old tests stuck in LAUNCHED check-in: b54fa13558 user: matt tags: v1.55 | |
17:20 | Added back the improved db:find-and-mark-incomplete routine. check-in: d8f671770d user: mrwellan tags: v1.55, v1.5519 | |
01:16 | Bumped version to v1.5519 check-in: d14bc79960 user: matt tags: v1.55, v1.5519 | |
Changes
Modified db.scm from [ae5afe1970] to [e4c046d5ff].
︙ | ︙ | |||
480 481 482 483 484 485 486 | ;;====================================================================== ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); | < > > | > > > > > > | | | > | | | | | > > > > > > > | > > | | | | | | | > > > > > > > > > | 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 | ;;====================================================================== ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); (define (db:find-and-mark-incomplete db #!key (ovr-deadtime #f)) (let* ((incompleted '()) (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) 7200)) ;; two hours (run-ids (db:get-run-ids db))) ;; iterate over runs to divy up the calls (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) (for-each (lambda (run-id) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; THIS CANNOT WORK. The run_duration is not updated in the central db due to performance concerns. ;; The testdat.db file must be consulted. ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; 600) (db:delay-if-busy) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id)) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 600 AND state IN ('RUNNING','REMOTEHOSTSTART');" run-id) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; (db:delay-if-busy) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))) db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" run-id)) run-ids) ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. ;; (db:delay-if-busy) (let* ((min-incompleted (filter (lambda (x) (let* ((testpath (cadr x)) (tdatpath (conc testpath "/testdat.db")) (dbexists (file-exists? tdatpath))) (or (not dbexists) ;; if no file then something wrong - mark as incomplete (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim incompleted)) (min-incompleted-ids (map car min-incompleted))) (if (> (length min-incompleted-ids) 0) (begin (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc min-incompleted-ids) ", ") " as INCOMPLETE") (sqlite3:execute db (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" (string-intersperse (map conc min-incompleted-ids) ",") ");"))))) ;; Now do rollups for the toplevel tests ;; (for-each (lambda (toptest) (let ((test-name (list-ref toptest 3)) (run-id (list-ref toptest 5))) (cdb:top-test-set-per-pf-counts *runremote* run-id test-name))) toplevels))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; ;; 1. Look at test records either deleted or part of deleted run: ;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' |
︙ | ︙ |
Modified runs.scm from [9ea69b3434] to [596fc89645].
︙ | ︙ | |||
857 858 859 860 861 862 863 | (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) | | > | | | | | 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 | (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec)) (st (db:test-get-state trec))) (if (not (equal? st "DELETED")) (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st))))) tests-info) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes (if (> (current-seconds)(+ last-time-incomplete 900)) (begin (set! last-time-incomplete (current-seconds)) (cdb:remote-run db:find-and-mark-incomplete #f))) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) |
︙ | ︙ |