Megatest

Changes On Branch 6f0c4b417ae5c766
Login

Changes In Branch experimental Through [6f0c4b417a] Excluding Merge-Ins

This is equivalent to a diff from 4d44659fb3 to 6f0c4b417a

2011-06-26
23:38
Merged refactor of dashboard to trunk check-in: d73b2c1642 user: mrwellan tags: trunk
2011-06-18
15:41
Fixed the issue where runs were blocked by itemized tests where the parent state and status confused megatest check-in: ca022fd7b5 user: mrwellan tags: experimental
13:20
Added PATH and DISPLAY to nbfake check-in: 6f0c4b417a user: mrwellan tags: experimental
2011-06-16
14:01
Added switch to set number of rows in a dashboard screen check-in: 4d44659fb3 user: mrwellan tags: trunk
10:15
Fixed annoying 'remove steps on re-run' bug check-in: 51810ab5ab user: mrwellan tags: trunk

Modified common.scm from [64281b3aad] to [8055bf6de1].

19
20
21
22
23
24
25

26
27
28
29
30
31


32
33
34
35
36
37
38
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41







+






+
+







(include "margs.scm")

(define getenv get-environment-variable)

(define home (getenv "HOME"))
(define user (getenv "USER"))

;; global gletches
(define *configinfo* #f)
(define *configdat*  #f)
(define *toppath*    #f)
(define *already-seen-runconfig-info* #f)
(define *waiting-queue* (make-hash-table))
(define *globalexitstatus* 0) ;; attempt to work around possible thread issues
(define *passnum*     0) ;; when running track calls to run-tests or similar


(define-inline (get-with-default val default)
  (let ((val (args:get-arg val)))
    (if val val default)))

(define-inline (assoc/default key lst . default)
  (let ((res (assoc key lst)))

Modified db.scm from [b22d01852d] to [11e94b6a23].

210
211
212
213
214
215
216











217
218
219
220
221
222
223
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234







+
+
+
+
+
+
+
+
+
+
+







(define (db:delete-test-step-records db run-id test-name itemdat)
  (sqlite3:execute db "DELETE FROM test_steps WHERE test_id in (SELECT id FROM tests WHERE run_id=? AND testname=? AND item_path=?);" 
		   run-id test-name (item-list->path itemdat)))
;; 
(define (db:delete-test-records db test-id)
  (sqlite3:execute db "DELETE FROM test_steps WHERE test_id=?;" test-id)
  (sqlite3:execute db "DELETE FROM tests WHERE id=?;" test-id))

;; set tests with state currstate and status currstatus to newstate and newstatus
;; use currstate = #f and or currstatus = #f to apply to any state or status respectively
;; WARNING: SQL injection risk
(define (db:set-tests-state-status db run-id tests currstate currstatus newstate newstatus)
  (sqlite3:execute db (conc "UPDATE tests SET state=?,status=? WHERE "
			    (if currstate  (conc "state='" currstate "' AND ") "")
			    (if currstatus (conc "status='" currstatus "' AND ") "")
			    " testname in "
			    "('" (string-intersperse tests "','") "')")
		   newstate newstatus))

(define (db:get-count-tests-running db)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     db

Modified megatest.scm from [38b46f671d] to [641a234d19].

43
44
45
46
47
48
49
50


51
52
53
54

55
56
57
58
59
60
61
43
44
45
46
47
48
49

50
51
52

53
54
55
56
57
58
59
60
61
62







-
+
+

-


+







  -testpatt patt          : in list-runs show only these tests, % is the wildcard
  -itempatt patt          : in list-runs show only tests with items that match patt
  -showkeys               : show the keys used in this megatest setup

Misc 
  -force                  : override some checks
  -xterm                  : start an xterm instead of launching the test
  -remove-runs            : remove the data for a run, requires fields, :runname 
  -remove-runs            : remove the data for a run, requires all fields be specified
                            and :runname ,-testpatt and -itempatt
                            and -testpatt
  -testpatt patt          : remove tests matching patt (requires -remove-runs)
  -keepgoing              : continue running until no jobs are \"LAUNCHED\" or
                            \"NOT_STARTED\"
  -rerun FAIL,WARN...     : re-run if called on a test that previously ran

Helpers
  -runstep stepname  ...  : take remaining params as comand and execute as stepname
                            log will be in stepname.log. Best to put command in quotes
  -logpro file            : with -exec apply logpro file to stepname.log, creates
                            stepname.html and sets log to same
                            If using make use stepname_logpro.log as your target
81
82
83
84
85
86
87

88
89
90
91
92
93
94
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96







+







			"-testpatt" 
			"-itempatt"
			"-setlog"
			"-set-toplog"
			"-runstep"
			"-logpro"
			"-m"
			"-rerun"
			) 
		 (list  "-h"
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-test-status"
		        "-gui"
115
116
117
118
119
120
121


122
123
124
125
126
127
128
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132







+
+








(define *didsomething* #f)

;;======================================================================
;; Remove old run(s)
;;======================================================================

;; since several actions can be specified on the command line the removal
;; is done first
(define (remove-runs)
  (cond
   ((not (args:get-arg ":runname"))
    (print "ERROR: Missing required parameter for -remove-runs, you must specify the run name pattern with :runname patt")
    (exit 2))
   ((not (args:get-arg "-testpatt"))
    (print "ERROR: Missing required parameter for -remove-runs, you must specify the test pattern with -testpatt")
480
481
482
483
484
485
486

487

488
489
490
491
492
493
494
484
485
486
487
488
489
490
491

492
493
494
495
496
497
498
499







+
-
+







	      (let* ((testinfo (db:get-test-info db run-id test-name (item-list->path itemdat))))
		(if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		    (begin
		      (print "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result")
		      (test-set-status! db run-id test-name
					(if kill-job? "KILLED" "COMPLETED")
					(if (vector-ref exit-info 1) ;; look at the exit-status
					    (if (and (not kill-job?) 
					    (if (eq? (vector-ref exit-info 2) 0)
						     (eq? (vector-ref exit-info 2) 0))
						"PASS"
						"FAIL")
					    "FAIL") itemdat (args:get-arg "-m")))))
	      (mutex-unlock! m)
	      ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	      ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
	      (print "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 

Modified runs.scm from [60a12faf67] to [b5df931833].

248
249
250
251
252
253
254






255
256
257
258
259
260

261
262
263
264
265
266
267
268
269
270
271
272
273
274

275
276
277
278

279
280
281
282
283
284
285
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293







+
+
+
+
+
+





-
+














+




+







	      tests)
    res))

(define (run-tests db test-names)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys->vallist keys #t))
	 (run-id      (register-run db keys))) ;;  test-name)))
    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (and (eq? *passnum* 0)
	     (args:get-arg "-keepgoing"))
	(db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))
    (set! *passnum* (+ *passnum* 1))
    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (let ((num-running (db:get-count-tests-running db))
	       (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	   (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	   ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	   (if (or (not max-concurrent-jobs)
		   (and max-concurrent-jobs
			(string->number max-concurrent-jobs)
			(not (>= num-running (string->number max-concurrent-jobs)))))
	       (run-one-test db run-id test-name keyvallst)
	       (print "WARNING: Max running jobs exceeded, current number running: " num-running 
		      ", max_concurrent_jobs: \"" max-concurrent-jobs "\""))))
       test-names)
      (if (args:get-arg "-keepgoing")
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (> estrem 0)
		(begin
		  (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...")
		  (sleep 10)
		  ;; (run-waiting-tests db)
		  (loop (+ numtimes 1)))))))))
	   
;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
(define (run-one-test db run-id test-name keyvallst)
  (run-waiting-tests db)
  (print "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
  (change-directory *toppath*)
  (let* ((test-path    (conc *toppath* "/tests/" test-name))
306
307
308
309
310
311
312
313

314
315
316

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

341
342
343
344
345
346
347
348
349
350

351
352
353
354


355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373






































374
375
376
377
378


379
380
381

382
383
384

385
386
387
388
389
390
391
314
315
316
317
318
319
320

321
322
323

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338






339
340
341

342
343
344
345
346
347
348
349
350
351

352
353
354


355
356
357
358
359
















360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400


401
402
403
404

405
406
407

408
409
410
411
412
413
414
415







-
+


-
+














-
-
-
-
-
-



-
+









-
+


-
-
+
+



-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



-
-
+
+


-
+


-
+







	      (db:set-comment-for-run db run-id (args:get-arg "-m")))
	  (let loop ((itemdat (car allitems))
		     (tal     (cdr allitems)))
	    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
	    (let* ((item-path     (item-list->path itemdat)) ;; (string-intersperse (map cadr itemdat) "/"))
		   (new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
		   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
		   (test-status   #f)
		   (testdat   #f)
		   (num-running (db:get-count-tests-running db))
		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	      (if (not (or (not max-concurrent-jobs)
			   (and max-concurrent-jobs
				(string->number max-concurrent-jobs)
				(not (>= num-running (string->number max-concurrent-jobs))))))
		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
			 ", max_concurrent_jobs: " max-concurrent-jobs)
		  (begin
		    (let loop2 ((ts (db:get-test-info db run-id test-name item-path)) ;; #f)
				(ct 0))
		      (if (and (not ts)
			       (< ct 10))
			  (begin
			    (register-test db run-id test-name item-path)
			    (db:test-set-comment db run-id test-name item-path "")
			    ;; (test-set-status! db run-id test-name "NOT_STARTED" "n/a" itemdat "")
			    ;; (db:set-comment-for-test db run-id test-name item-path "")

			    ;; Move the next line into the test exectute code
			    ;; (db:delete-test-step-records db run-id test-name) ;; clean out if this is a re-run

			    (loop2 (db:get-test-info db run-id test-name item-path)
				   (+ ct 1)))
			  (if ts
			      (set! test-status ts)
			      (set! testdat ts)
			      (begin
				(print "WARNING: Couldn't register test " test-name " with item path " item-path ", skipping")
				(if (not (null? tal))
				    (loop (car tal)(cdr tal)))))))
		    (change-directory test-path)
		    ;; this block is here only to inform the user early on
		    (if (file-exists? runconfigf)
			(setup-env-defaults db runconfigf run-id *already-seen-runconfig-info*)
			(print "WARNING: You do not have a run config file: " runconfigf))
		    ;; (print "run-id: " run-id " test-name: " test-name " item-path: " item-path " test-status: " (test:get-status test-status) " test-state: " (test:get-state test-status))
		    ;; (print "run-id: " run-id " test-name: " test-name " item-path: " item-path " testdat: " (test:get-status testdat) " test-state: " (test:get-state testdat))
		    (case (if (args:get-arg "-force")
			      'NOT_STARTED
			      (if test-status
				  (string->symbol (test:get-state test-status))
			      (if testdat
				  (string->symbol (test:get-state testdat))
				  'failed-to-insert))
		      ((failed-to-insert)
		       (print "ERROR: Failed to insert the record into the db"))
		      ((NOT_STARTED COMPLETED) ;; (cadr status is the row id for the run record)
		       (if (and (equal? (test:get-state test-status)  "COMPLETED")
				(or (equal? (test:get-status test-status) "PASS")
				    (equal? (test:get-status test-status) "WARN")
				    (equal? (test:get-status test-status) "CHECK"))
				(not (args:get-arg "-force")))
			   (print "NOTE: Not starting test " new-test-name " as it is state \"COMPLETED\" and status \"" (test:get-status test-status) "\", use -force to override")
			   (let* ((get-prereqs-cmd (lambda ()
						     (db-get-prereqs-not-met db run-id waiton))) ;; check before running ....
				  (launch-cmd      (lambda ()
						     (launch-test db run-id test-conf keyvallst test-name test-path itemdat)))
				  (testrundat      (list get-prereqs-cmd launch-cmd)))
			     (if (or (args:get-arg "-force")
				     (null? ((car testrundat)))) ;; are there any tests that must be run before this one...
				 ((cadr testrundat)) ;; this is the line that launches the test to the remote host
				 (hash-table-set! *waiting-queue* new-test-name testrundat)))))
		      ((NOT_STARTED COMPLETED)
		       (print "Got here, " (test:get-state testdat))
		       (let ((runflag #f))
			 (cond
			  ;; -force, run no matter what
			  ((args:get-arg "-force")(set! runflag #t))
			  ;; NOT_STARTED, run no matter what
			  ((equal? (test:get-state testdat) "NOT_STARTED")(set! runflag #t))
			  ;; not -rerun and PASS, WARN or CHECK, do no run
			  ((and (or (not (args:get-arg "-rerun"))
				    (args:get-arg "-keepgoing"))
				(member (test:get-status testdat) '("PASS" "WARN" "CHECK")))
			   (set! runflag #f))
			  ;; -rerun and status is one of the specifed, run it
			  ((and (args:get-arg "-rerun")
				(let ((rerunlst (string-split (args:get-arg "-rerun") ","))) ;; FAIL,
				  (member (test:get-status testdat) rerunlst)))
			   (set! runflag #t))
			  ;; -keepgoing, do not rerun FAIL
			  ((and (args:get-arg "-keepgoing")
				(member (test:get-status testdat) '("FAIL")))
			   (set! runflag #f))
			  ((and (not (args:get-arg "-rerun"))
				(member (test:get-status testdat) '("FAIL" "n/a")))
			   (set! runflag #t))
			  (else (set! runflag #f)))
			 ;; (print "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
			 (if (not runflag)
			     (print "NOTE: Not starting test " new-test-name " as it is state \"COMPLETED\" and status \"" (test:get-status testdat) "\", use -force to override")
			     (let* ((get-prereqs-cmd (lambda ()
						       (db-get-prereqs-not-met db run-id waiton))) ;; check before running ....
				    (launch-cmd      (lambda ()
						       (launch-test db run-id test-conf keyvallst test-name test-path itemdat)))
				    (testrundat      (list get-prereqs-cmd launch-cmd)))
			       (if (or (args:get-arg "-force")
				       (null? ((car testrundat)))) ;; are there any tests that must be run before this one...
				   ((cadr testrundat)) ;; this is the line that launches the test to the remote host
				   (hash-table-set! *waiting-queue* new-test-name testrundat))))))
		      ((KILLED) 
		       (print "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
		      ((LAUNCHED REMOTEHOSTSTART RUNNING)  
		       (if (> (- (current-seconds)(+ (db:test-get-event_time test-status)
						     (db:test-get-run_duration test-status)))
		       (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
						     (db:test-get-run_duration testdat)))
			      100) ;; i.e. no update for more than 100 seconds
			   (begin
			     (print "WARNING: Test " test-name " appears to be dead.")
			     (print "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
			     (test-set-status! db run-id test-name "INCOMPLETE" "STUCK/DEAD" itemdat "Test is stuck or dead"))
			   (print "NOTE: " test-name " is already running")))
		      (else       (print "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state test-status))))))
		      (else       (print "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat))))))
	      (if (not (null? tal))
		  (loop (car tal)(cdr tal)))))))))

(define (run-waiting-tests db)
  (let ((numtries           0)
	(last-try-time      (current-seconds))
	(times              (list 1))) ;; minutes to wait before trying again to kick off runs

Modified utils/nbfake from [033159a590] to [b1e649d799].

1
2
3
4
5
6
7
8
9
10
11
12

1
2
3
4
5
6
7
8
9
10
11

12











-
+
#!/bin/bash

# ssh localhost "nohup $* > nbfake.log 2> nbfake.err < /dev/null"

if [[ $TARGETHOST == ""  ]]; then
  TARGETHOST=localhost
fi

# Can't always trust $PWD
CURRWD=`pwd`

ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\""
ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\""