Megatest

Check-in [f97980cf8c]
Login
Overview
Comment:Blindly duplicated run logic to use in monitor based launching
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f97980cf8cbff57ab5a3dc404c321d5c0cde47b2
User & Date: matt on 2011-10-23 16:42:52
Other Links: manifest | tags
Context
2011-10-23
19:10
Monitor based runs partially working check-in: 59034f6b4d user: matt tags: trunk
16:42
Blindly duplicated run logic to use in monitor based launching check-in: f97980cf8c user: matt tags: trunk
06:02
Progress on monitor based running check-in: 3cbc9cb854 user: matt tags: trunk
Changes

Modified keys.scm from [1fd229ce93] to [99bd692b48].

67
68
69
70
71
72
73




74
75
76
77
78
79
80
81
82
83

















84
85
86
87
88
89
90

(define (args:usage . a) #f)

;; keys->vallist is called several times (quite unnecessarily), use this hash to suppress multiple
;; reporting of missing keys on the command line.
(define keys:warning-suppress-hash (make-hash-table))





;; this now invalidates using "/" in item names
(define (keys:target-set-args keys target ht)
  (let ((vals (string-split target "/")))
    (if (eq? (length vals)(length keys))
	(for-each (lambda (key val)
		    (hash-table-set! ht (conc ":" (vector-ref key 0)) val))
		  keys
		  vals)
	(debug:print 0 "ERROR: wrong number of values in " target ", should match " keys))
    vals))


















;; Using the keys pulled from the database (initially set from the megatest.config file)
;; look for the equivalent value on the command line and add it to a list, or #f if not found.
;; default => (val1 val2 val3 ...)
;; withkey => (:key1 val1 :key2 val2 :key3 val3 ...)
(define (keys->vallist keys . withkey) ;; ORDERING IS VERY IMPORTANT, KEEP PROPER ORDER HERE!
  (let* ((keynames   (map key:get-fieldname keys))







>
>
>
>










>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

(define (args:usage . a) #f)

;; keys->vallist is called several times (quite unnecessarily), use this hash to suppress multiple
;; reporting of missing keys on the command line.
(define keys:warning-suppress-hash (make-hash-table))

;;======================================================================
;; key <=> target routines
;;======================================================================

;; this now invalidates using "/" in item names
(define (keys:target-set-args keys target ht)
  (let ((vals (string-split target "/")))
    (if (eq? (length vals)(length keys))
	(for-each (lambda (key val)
		    (hash-table-set! ht (conc ":" (vector-ref key 0)) val))
		  keys
		  vals)
	(debug:print 0 "ERROR: wrong number of values in " target ", should match " keys))
    vals))

;; given the keys (a list of vectors <key field>) and a target return a keyval list
;; keyval list ( (key1 val1) (key2 val2) ...)
(define (keys:target->keyval keys target)
  (let* ((targlist (string-split target "/"))
	 (numkeys  (length keys))
	 (numtarg  (length targlist))
	 (targtweaked (if (> numkeys numtarg)
			  (append targlist (make-list (- numkeys numtarg) ""))
			  targlist)))
    (map (lambda (key targ)
	   (list (vector-ref key 0) targ))
	 keys targtweaked)))

;;======================================================================
;; key <=> args routines
;;======================================================================

;; Using the keys pulled from the database (initially set from the megatest.config file)
;; look for the equivalent value on the command line and add it to a list, or #f if not found.
;; default => (val1 val2 val3 ...)
;; withkey => (:key1 val1 :key2 val2 :key3 val3 ...)
(define (keys->vallist keys . withkey) ;; ORDERING IS VERY IMPORTANT, KEEP PROPER ORDER HERE!
  (let* ((keynames   (map key:get-fieldname keys))

Modified runs.scm from [2722d9b41e] to [fe2d069603].

535
536
537
538
539
540
541


542
543
544
545
546
547
548
	     #f ;; cannot have a which is waiting on b happening before b
	     (if (and b-waiton (equal? b-waiton a))
		 #t ;; this is the correct order, b is waiting on a and b is before a
		 (if (> a-priority b-priority)
		     #t ;; if a is a higher priority than b then we are good to go
		     #f))))))))



(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))  ;;  test-name)))
	 (deferred    '())) ;; delay running these since they have a waiton clause
    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified







>
>







535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
	     #f ;; cannot have a which is waiting on b happening before b
	     (if (and b-waiton (equal? b-waiton a))
		 #t ;; this is the correct order, b is waiting on a and b is before a
		 (if (> a-priority b-priority)
		     #t ;; if a is a higher priority than b then we are good to go
		     #f))))))))

;; This is original run-tests, this routine is deprecated and we will transition to using runs:run-tests (see below)
;;
(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))  ;;  test-name)))
	 (deferred    '())) ;; delay running these since they have a waiton clause
    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
	    (if (and (> estrem 0)
		     (eq? *globalexitstatus* 0))
		(begin
		  (debug:print 1 "Keep going, estimated " estrem " tests remaining to run, will continue in 3 seconds ...")
		  (sleep 3)
		  (run-waiting-tests db)
		  (loop (+ numtimes 1)))))))))
	   
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (debug:print 1 "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process







|







573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
	    (if (and (> estrem 0)
		     (eq? *globalexitstatus* 0))
		(begin
		  (debug:print 1 "Keep going, estimated " estrem " tests remaining to run, will continue in 3 seconds ...")
		  (sleep 3)
		  (run-waiting-tests db)
		  (loop (+ numtimes 1)))))))))
	  
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (debug:print 1 "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
766
767
768
769
770
771
772






























































































































































































































773
774
775
776
777
778
779
				((cadr testdat))
				(hash-table-delete! *waiting-queue* testname)))
			  (if (not db)
			      (sqlite3:finalize! ldb)))))
		  waiting-test-names)
	;; (sleep 10) ;; no point in rushing things at this stage?
	(loop (hash-table-keys *waiting-queue*)))))))































































































































































































































(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))
	(count   (if (null? params) 1 (car params))))
    (conc "/" (string-intersperse 
	       (take dparts (- (length dparts) count))
	       "/"))))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
				((cadr testdat))
				(hash-table-delete! *waiting-queue* testname)))
			  (if (not db)
			      (sqlite3:finalize! ldb)))))
		  waiting-test-names)
	;; (sleep 10) ;; no point in rushing things at this stage?
	(loop (hash-table-keys *waiting-queue*)))))))

;;======================================================================
;; New methodology. These routines will replace the above in time. For
;; now the code is duplicated. This stuff is initially used in the monitor
;; based code.
;;======================================================================

;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests.
;; keyvals
(define (runs:run-tests db target runname testpatts itempatts flags)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys:target->keyval keys target))
	 (run-id      (register-run db keys))  ;;  test-name)))
	 (deferred    '()) ;; delay running these since they have a waiton clause
	 (keepgoing   (hash-table-ref/default flags "-keepgoing")))
    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (and (eq? *passnum* 0)
	     keepgoing)
	(begin
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  (db:delete-tests-in-state db run-id "NOT_STARTED")
	  (db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))
    (set! *passnum* (+ *passnum* 1))
    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (if (runs:can-run-more-tests db)
	     (run-one-test db run-id test-name keyvallst)
	     ;; add some delay 
	     ;(sleep 2)
	     ))
       (tests:sort-by-priority-and-waiton test-names))
      ;; (run-waiting-tests db)
      (if keepgoing
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (and (> estrem 0)
		     (eq? *globalexitstatus* 0))
		(begin
		  (debug:print 1 "Keep going, estimated " estrem " tests remaining to run, will continue in 3 seconds ...")
		  (sleep 3)
		  (run-waiting-tests db)
		  (loop (+ numtimes 1)))))))))

(define (run-one-test db run-id runname test-name keyvallst item-patts flags)
  (debug:print 1 "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   runname)
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
  (change-directory *toppath*)
  (let* ((test-path    (conc *toppath* "/tests/" test-name)) ;; could use test:get-testconfig here ...
	 (test-configf (conc test-path "/testconfig"))
	 (testexists   (and (file-exists? test-configf)(file-read-access? test-configf)))
	 (test-conf    (if testexists (read-config test-configf #f #t) (make-hash-table)))
	 (waiton       (let ((w (config-lookup test-conf "requirements" "waiton")))
			 (if (string? w)(string-split w)'())))
	 (force        (hash-table-ref/default flags "-force"))
	 (rerun        (hash-table-ref/default flags "-rerun"))
	 ;; Are these tags still used? I don't think so...
	 ;;(tags         (let ((t (config-lookup test-conf "setup" "tags")))
	 ;;       	 ;; we want our tags to be separated by commas and fully delimited by commas
	 ;;       	 ;; so that queries with "like" can tie to the commas at either end of each tag
	 ;;       	 ;; while also allowing the end user to freely use spaces and commas to separate tags
	 ;;       	 (if (string? t)(string-substitute (regexp "[,\\s]+") "," (conc "," t ",") #t)
	 ;;       	     '()))))
	 )
    (if (not testexists)
	;; if the test is ill defined spit out an error but keep going (different from how done previously
	(debug:print 0 "ERROR: Can't find config file " test-configf)
	;; put top vars into convenient variables and open the db
	(let* (;; db is always at *toppath*/db/megatest.db
	       (items       (hash-table-ref/default test-conf "items" '()))
	       (itemstable  (hash-table-ref/default test-conf "itemstable" '()))
	       (allitems    (if (or (not (null? items))(not (null? itemstable)))
				(append (item-assoc->item-list items)
					(item-table->item-list itemstable))
				'(()))) ;; a list with one null list is a test with no items
	       (runconfigf  (conc  *toppath* "/runconfigs.config")))
	  (debug:print 1 "items: ")
	  (if (>= *verbosity* 1)(pp allitems))
	  (if (>= *verbosity* 5)
	      (begin
		(print "items: ")(pp (item-assoc->item-list items))
		(print "itemstable: ")(pp (item-table->item-list itemstable))))

	  ;; Comments are loaded by the test run, not at launch time (in general)
	  ;;(if (args:get-arg "-m")
	  ;;    (db:set-comment-for-run db run-id (args:get-arg "-m")))

	  ;; Here is where the test_meta table is best updated
	  (runs:update-test_meta db test-name test-conf)

	  ;; braindead work-around for poorly specified allitems list BUG!!! FIXME
	  (if (null? allitems)(set! allitems '(())))
	  (let loop ((itemdat (car allitems))
		     (tal     (cdr allitems)))
	    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
	    ;; Handle lists of items
	    (let* ((item-path     (item-list->path itemdat)) ;; (string-intersperse (map cadr itemdat) "/"))
		   (new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
		   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
		   (testdat   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs"))
		   (parent-test (and (not (null? items))(equal? item-path "")))
		   (single-test (and (null? items) (equal? item-path "")))
		   (item-test   (not (equal? item-path "")))
		   ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is %
		   (item-matches (if item-patts
				     (let ((res #f))
				       (for-each 
					(lambda (patt)
					  (if (string-match (glob->regexp
							     (string-translate patt "%" "*"))
							    item-path)
					      (set! res #t)))
					(string-split item-patts ",")))
				    #t)))
	      (debug:print 3 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (and item-matches (runs:can-run-more-tests db))
		  (begin
		    (let loop2 ((ts (db:get-test-info db run-id test-name item-path)) ;; #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))
			  (begin
			    (register-test db run-id test-name item-path)
			    ;; Why did I set the comment here?!? POSSIBLE BUG BUT I'M REMOVING IT FOR NOW 10/23/2011
			    ;; (db:test-set-comment db run-id test-name item-path "")
			    (loop2 (db:get-test-info db run-id test-name item-path)
				   (+ ct 1)))
			  (if ts
			      (set! testdat ts)
			      (begin
				(debug:print 0 "WARNING: Couldn't register test " test-name " with item path " item-path ", skipping")
				(if (not (null? tal))
				    (loop (car tal)(cdr tal)))))))
		    (change-directory test-path)
		    ;; this block is here only to inform the user early on
		    (if (file-exists? runconfigf)
			(setup-env-defaults db runconfigf run-id *already-seen-runconfig-info*)
			(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
		    (debug:print 4 "run-id: " run-id " test-name: " test-name " item-path: " item-path " testdat: " (test:get-status testdat) " test-state: " (test:get-state testdat))
		    (case (if force-run ;; (args:get-arg "-force")
			      'NOT_STARTED
			      (if testdat
				  (string->symbol (test:get-state testdat))
				  'failed-to-insert))
		      ((failed-to-insert)
		       (debug:print 0 "ERROR: Failed to insert the record into the db"))
		      ((NOT_STARTED COMPLETED)
		       (debug:print 6 "Got here, " (test:get-state testdat))
		       (let ((runflag #f))
			 (cond
			  ;; i.e. this is the parent test to a suite of items, never "run" it
			  (parent-test
			   (set! runflag #f))
			  ;; -force, run no matter what
			  (force (set! runflag #t))
			  ;; NOT_STARTED, run no matter what
			  ((equal? (test:get-state testdat) "NOT_STARTED")(set! runflag #t))
			  ;; not -rerun and PASS, WARN or CHECK, do no run
			  ((and (or (not rerun)
				    keepgoing)
				(member (test:get-status testdat) '("PASS" "WARN" "CHECK")))
			   (set! runflag #f))
			  ;; -rerun and status is one of the specifed, run it
			  ((and rerun
				(let ((rerunlst (string-split rerun ","))) ;; FAIL,
				  (member (test:get-status testdat) rerunlst)))
			   (set! runflag #t))
			  ;; -keepgoing, do not rerun FAIL
			  ((and keepgoing
				(member (test:get-status testdat) '("FAIL")))
			   (set! runflag #f))
			  ((and (not rerun)
				(member (test:get-status testdat) '("FAIL" "n/a")))
			   (set! runflag #t))
			  (else (set! runflag #f)))
			 (debug:print 6 "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
			 (if (not runflag)
			     (if (not parent-test)
				 (debug:print 1 "NOTE: Not starting test " new-test-name " as it is state \"COMPLETED\" and status \"" (test:get-status testdat) "\", use -force to override"))
			     (let* ((get-prereqs-cmd (lambda ()
						       (db-get-prereqs-not-met db run-id waiton))) ;; check before running ....
				    (launch-cmd      (lambda ()
						       (launch-test db run-id test-conf keyvallst test-name test-path itemdat)))
				    (testrundat      (list get-prereqs-cmd launch-cmd)))
			       (if (or force
				       (let ((preqs-not-yet-met ((car testrundat))))
					 (debug:print 2 "Preqrequesites for " test-name ": " preqs-not-yet-met)
					 (null? preqs-not-yet-met))) ;; are there any tests that must be run before this one...
				   (if (not ((cadr testrundat))) ;; this is the line that launches the test to the remote host
				       (begin
					 (print "ERROR: Failed to launch the test. Exiting as soon as possible")
					 (set! *globalexitstatus* 1) ;; 
					 (process-signal (current-process-id) signal/kill)
					 ;(exit 1)
					 ))
				   (if (not keepgoing)
				       (hash-table-set! *waiting-queue* new-test-name testrundat)))))))
		      ((KILLED) 
		       (debug:print 1 "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
		      ((LAUNCHED REMOTEHOSTSTART RUNNING)  
		       (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
						     (db:test-get-run_duration testdat)))
			      100) ;; i.e. no update for more than 100 seconds
			   (begin
			     (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
			     (test-set-status! db run-id test-name "INCOMPLETE" "STUCK/DEAD" itemdat "Test is stuck or dead" #f))
			   (debug:print 2 "NOTE: " test-name " is already running")))
		      (else       (debug:print 0 "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat))))))
	      (if (not (null? tal))
		  (loop (car tal)(cdr tal)))))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))
	(count   (if (null? params) 1 (car params))))
    (conc "/" (string-intersperse 
	       (take dparts (- (length dparts) count))
	       "/"))))

Modified tasks.scm from [1354231a43] to [b3487ed7af].

225
226
227
228
229
230
231




232

(define (tasks:remove-monitor-record db)
  (sqlite3:execute db "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   (current-process-id)
		   (get-host-name)))

(define (tasks:start-run db task)




  (print "Starting run " task))







>
>
>
>

225
226
227
228
229
230
231
232
233
234
235
236

(define (tasks:remove-monitor-record db)
  (sqlite3:execute db "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   (current-process-id)
		   (get-host-name)))

(define (tasks:start-run db task)
  ;; Starting run #(3 run matt reset ubuntu/afs/tmp ww44 % % 1319368208.0 1319386680.0)
  ;; Starting run #(5 run matt reset centos/nfs/nada ww42 all all 1319371306.0 1319386801.0)


  (print "Starting run " task))