Megatest

Check-in [f7c3e6325e]
Login
Overview
Comment:Added mechanism to set defunct tests to INCOMPLETE every five minutes
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: f7c3e6325ef1b4398f494d049c038c9d953d24a7
User & Date: matt on 2013-10-19 22:26:36
Other Links: branch diff | manifest | tags
Context
2013-10-21
10:37
Added run and test time display to test control panel. Added default runtimelim check-in: a4cf89e73c user: mrwellan tags: v1.55
2013-10-19
22:26
Added mechanism to set defunct tests to INCOMPLETE every five minutes check-in: f7c3e6325e user: matt tags: v1.55
2013-10-18
09:50
Fixed typo in exception handling message - found this when trying to create links on a full disk check-in: 0e8689805c user: mrwellan tags: v1.55
Changes

Modified db.scm from [1ccd181f1b] to [4356e24404].

458
459
460
461
462
463
464




























465
466
467
468
469
470
471
       (sqlite3:execute db "ALTER TABLE test_meta ADD COLUMN jobgroup TEXT DEFAULT 'default';"))
      ((< mver 1.37)
       (db:set-var db "MEGATEST_VERSION" 1.37)
       (sqlite3:execute db "ALTER TABLE tests ADD COLUMN archived INTEGER DEFAULT 0;")) 
      ((< mver megatest-version)
       (db:set-var db "MEGATEST_VERSION" megatest-version))))))





























;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
       (sqlite3:execute db "ALTER TABLE test_meta ADD COLUMN jobgroup TEXT DEFAULT 'default';"))
      ((< mver 1.37)
       (db:set-var db "MEGATEST_VERSION" 1.37)
       (sqlite3:execute db "ALTER TABLE tests ADD COLUMN archived INTEGER DEFAULT 0;")) 
      ((< mver megatest-version)
       (db:set-var db "MEGATEST_VERSION" megatest-version))))))

;;======================================================================
;; M A I N T E N A N C E
;;======================================================================

(define (db:find-and-mark-incomplete db)
  (let ((incompleted '()))
    (sqlite3:for-each-row 
     (lambda (test-id)
       (set! incompleted (cons test-id incompleted)))
     db
     "SELECT id FROM tests WHERE event_time<? AND state IN ('RUNNING','REMOTEHOSTSTART');"
     (- (current-seconds) 600)) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
    (sqlite3:for-each-row
     (lambda (test-id)
       (set! incompleted (cons test-id incompleted)))
     db
     "SELECT id FROM tests WHERE event_time<? AND state IN ('LAUNCHED');"
     (- (current-seconds)(* 60 60 24))) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
    (if (> (length incompleted) 0)
	(begin
	  (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc incompleted) ", ") " as INCOMPLETE")
	  (sqlite3:execute 
	   db
	   (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" 
		 (string-intersperse (map conc incompleted) ",")
		 ");"))))))
		     
;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
498
499
500
501
502
503
504

505
506
507
508
509
510
511
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)

    (sqlite3:execute db "VACUUM;")))

;; (define (db:report-junk-records db)


;;======================================================================
;; meta get and set vars







>







526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    (db:find-and-mark-incomplete db)
    (sqlite3:execute db "VACUUM;")))

;; (define (db:report-junk-records db)


;;======================================================================
;; meta get and set vars

Modified runs.scm from [3ff33aede5] to [31ee3d42e6].

732
733
734
735
736
737
738
739

740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757






758
759
760
761
762
763
764
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1)))


    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))







      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))







|
>


















>
>
>
>
>
>







732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (- (current-seconds) 610)))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))

      ;; Here we mark any old defunct tests as incomplete. Do this every five minutes
      (if (> (current-seconds)(+ last-time-incomplete 300))
	  (begin
	    (set! last-time-incomplete (current-seconds))
	    (cdb:remote-run db:find-and-mark-incomplete #f)))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))