Megatest

Changes On Branch 8e1432907b50cfed
Login

Changes In Branch v1.60_ezsteps_tcsh_fix Through [8e1432907b] Excluding Merge-Ins

This is equivalent to a diff from fb5d8213b7 to 8e1432907b

2015-05-21
01:28
cherrypicked a064 check-in: 50cc57039a user: matt tags: v1.60
2015-05-20
23:38
put more output in mt_launch.log check-in: 157b6d1b21 user: matt tags: v1.60_ezsteps_tcsh_fix
11:54
Fixed typo and improved example check-in: 8e1432907b user: mrwellan tags: v1.60_ezsteps_tcsh_fix
2015-05-16
21:23
Yes, I do want the db dir in mtrah check-in: 0d58c56f22 user: matt tags: v1.60_ezsteps_tcsh_fix
2015-05-11
17:06
Fixed ezstep command execution where system default shell is tcsh check-in: 9b7efa961f user: mrwellan tags: v1.60_ezsteps_tcsh_fix
05:13
Added generic pool of cpus to batchsim check-in: fb5d8213b7 user: matt tags: v1.60
2015-05-07
18:09
Capture missing fix check-in: a9aad76712 user: mrwellan tags: v1.60

Modified db.scm from [bd9d70e94b] to [272f710720].

2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       0)))))))
             ;; DEBUG FIXME - need to merge this v.155 query correctly   
             ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?)
             ;; AND NOT (uname = 'n/a' AND item_path = '');"

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct run-id)







|







2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       )))))))
             ;; DEBUG FIXME - need to merge this v.155 query correctly   
             ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?)
             ;; AND NOT (uname = 'n/a' AND item_path = '');"

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct run-id)

Modified launch.scm from [0044077627] to [6b1778f659].

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda ()
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid (process-run cmd)))
	 (rmt:test-set-top-process-pid run-id test-id pid)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (vector-set! exit-info 0 pid)
		       (vector-set! exit-info 1 exit-status)
		       (vector-set! exit-info 2 exit-code)







|
|
|







79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc "exec " stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid (process-run "/bin/bash" (list "-c" cmd))))
	 (rmt:test-set-top-process-pid run-id test-id pid)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (vector-set! exit-info 0 pid)
		       (vector-set! exit-info 1 exit-status)
		       (vector-set! exit-info 2 exit-code)
198
199
200
201
202
203
204
205

206
207


208
209
210
211
212
213
214
215
216
                                              runscript))))) ;; assume it is on the path
	       ;; (rollup-status 0)
	       )
	  (change-directory top-path)

	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,

	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;


	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
	    (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(begin
		  (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
		  (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys))







|
>


>
>

|







198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
                                              runscript))))) ;; assume it is on the path
	       ;; (rollup-status 0)
	       )
	  (change-directory top-path)

	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; WAS: Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; NOW: Do not run test test unless state is LAUNCHED
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  ;; This is flawed. It should be a single transaction that tests for NOT_STARTED and updates to REMOTEHOSTSTART
	  ;;
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
	    (if (equal? (db:test-get-state test-info) "LAUNCHED") ;; '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(begin
		  (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
		  (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys))
880
881
882
883
884
885
886



887





888
889
890
891
892
893
894
				      (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
				      (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
				      (list 'runname   runname)
				      (list 'mt-bindir-path mt-bindir-path))))))))

    ;; clean out step records from previous run if they exist
    ;; (rmt:delete-test-step-records run-id test-id)



    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir





    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))







>
>
>
|
>
>
>
>
>







883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
				      (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
				      (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
				      (list 'runname   runname)
				      (list 'mt-bindir-path mt-bindir-path))))))))

    ;; clean out step records from previous run if they exist
    ;; (rmt:delete-test-step-records run-id test-id)
    
    ;; Moving launch logs to MT_RUN_AREA_HOME/logs 
    ;;
    (let ((launchdir (configf:lookup *configdat* "setup" "launchdir"))) ;; (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
      (if (not launchdir) ;; default
	  (change-directory (conc *toppath* "/logs")) ;; can assume this exists
	  (case (string->symbol launchdir)
	    ((legacy)(change-directory work-area))
	    (else    (change-directory launchdir)))))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
	   (launch-results (apply (if launchwait
				      cmd-run-with-stderr->list
				      process-run)
				  (if useshell
				      (let ((cmdstr (string-intersperse fullcmd " ")))
					(if launchwait
					    cmdstr
					    (conc cmdstr " >> mt_launch.log 2>&1")))
				      (car fullcmd))
				  (if useshell
				      '()
				      (cdr fullcmd)))))
      (if (not launchwait) ;; give the OS a little time to allow the process to start
	  (thread-sleep! 0.01))
      (with-output-to-file "mt_launch.log"
	(lambda ()
	  (if (list? launch-results)
	      (apply print launch-results)
	      (print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	  #:append))
      (debug:print 2 "Launching completed, updating db")
      (debug:print 2 "Launch results: " launch-results)







|






|







929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
	   (launch-results (apply (if launchwait
				      cmd-run-with-stderr->list
				      process-run)
				  (if useshell
				      (let ((cmdstr (string-intersperse fullcmd " ")))
					(if launchwait
					    cmdstr
					    (conc cmdstr " >> " work-area "/mt_launch.log 2>&1")))
				      (car fullcmd))
				  (if useshell
				      '()
				      (cdr fullcmd)))))
      (if (not launchwait) ;; give the OS a little time to allow the process to start
	  (thread-sleep! 0.01))
      (with-output-to-file (conc work-area "/mt_launch.log")
	(lambda ()
	  (if (list? launch-results)
	      (apply print launch-results)
	      (print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	  #:append))
      (debug:print 2 "Launching completed, updating db")
      (debug:print 2 "Launch results: " launch-results)

Modified runs.scm from [e4e58cdff9] to [a7483fb45c].

849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
				  reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      (mt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)







|







849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
				  reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      (rmt:roll-up-pass-fail-counts run-id test-name item-path "FAIL") ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)

Modified tests/fullrun/megatest.config from [e5113ba78d] to [5c36f01faf].

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39



40
41
42


43
44
45
46
47
48
49
[setup]

# turn off faststart, put monitor.db in MT_RUN_AREA_HOME/db
# and set the dbdir to /var/tmp/$USER/mt_db to enable keeping
# the raw db in /var/tmp/$USER
#
faststart  no
monitordir #{getenv MT_RUN_AREA_HOME}/db
dbdir      /var/tmp/#{getenv USER}/mt_db

# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1




# wait for runs to completely complete. yes, anything else is no
run-wait yes



# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s







|







>
>
>



>
>







25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
[setup]

# turn off faststart, put monitor.db in MT_RUN_AREA_HOME/db
# and set the dbdir to /var/tmp/$USER/mt_db to enable keeping
# the raw db in /var/tmp/$USER
#
faststart  no
monitordir #{scheme (conc *toppath*)}/db
dbdir      /var/tmp/#{getenv USER}/mt_db

# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1

# set the dbdir, default is linktree
dbdir #{getenv MT_RUN_AREA_HOME}/db/

# wait for runs to completely complete. yes, anything else is no
run-wait yes



# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s

Modified tests/simplerun/tests/test1/step2.sh from [97ecbea6c6] to [b3e19b3724].

1
2
3
4
5
6
#!/usr/bin/env bash

# Run your step here
echo Got here eh!






<

1
2
3
4

5
#!/usr/bin/env bash

# Run your step here
echo Got here eh!