Megatest

Changes On Branch cc6c07fcb36686d2
Login

Changes In Branch launcher-exit-handling-refactor Through [cc6c07fcb3] Excluding Merge-Ins

This is equivalent to a diff from b6900f572f to cc6c07fcb3

2015-11-11
20:47
All states supported now check-in: ab348d3b46 user: matt tags: v1.60
20:47
Fix typo Closed-Leaf check-in: 56036da5c4 user: matt tags: launcher-exit-handling-refactor
19:18
refactor launcher handling of exit codes check-in: cc6c07fcb3 user: mrwellan tags: launcher-exit-handling-refactor
11:27
Merged logpro-abort-check into v1.60/28 but abort not quite right yet check-in: b6900f572f user: mrwellan tags: v1.60
2015-11-10
22:32
Bumped version check-in: aceba0bc19 user: matt tags: v1.60
21:53
Added partial support for logpro check and abort exit codes Closed-Leaf check-in: d646277c30 user: matt tags: logpro-abort-check

Modified launch.scm from [0439ab0db6] to [254cea039f].

10
11
12
13
14
15
16

17
18
19
20
21
22
23
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24







+








;;======================================================================
;; launch a task - this runs on the originating host, tests themselves
;;
;;======================================================================

(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables)
(use defstruct)

(import (prefix base64 base64:))
(import (prefix sqlite3 sqlite3:))

(declare (unit launch))
(declare (uses common))
(declare (uses configf))
50
51
52
53
54
55
56


57
58
59
60
61
62
63
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66







+
+







;; if handed a string, process it, else look for MT_CMDINFO
(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
    (if enccmd
	(common:read-encoded-string enccmd)
	'())))

;;                       0           1              2              3
(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))

(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig)
  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
	 (stepinfo       (cadr ezstep))
	 (stepparts      (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo))
	 (stepparms      (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each 
	 (stepcmd        (list-ref stepparts 3))
98
99
100
101
102
103
104
105
106
107



108
109
110
111
112
113
114

115
116
117
118
119
120

121
122
123



124
125
126
127
128
129

130
131

132
133
134
135
136
137

138
139
140
141
142





143
144
145
146



147
148
149
150
151

152
153
154
155
156
157
158
159

160
161

162
163
164

165
166
167
168
169
170

171
172
173
174
175
176

177
178
179
180
181
182
183
184

185
186
187
188
189
190
191
101
102
103
104
105
106
107



108
109
110
111
112
113
114
115
116

117
118
119
120
121
122
123
124



125
126
127
128
129
130
131
132

133
134

135
136
137
138
139
140

141
142




143
144
145
146
147
148



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

165
166

167
168
169

170
171
172
173
174
175

176
177
178
179
180
181

182
183
184
185
186
187
188
189

190
191
192
193
194
195
196
197







-
-
-
+
+
+






-
+






+
-
-
-
+
+
+





-
+

-
+





-
+

-
-
-
-
+
+
+
+
+

-
-
-
+
+
+





+







-
+

-
+


-
+





-
+





-
+







-
+







     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid (process-run "/bin/bash" (list "-c" cmd))))
	 (rmt:test-set-top-process-pid run-id test-id pid)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (vector-set! exit-info 0 pid)
		       (vector-set! exit-info 1 exit-status)
		       (vector-set! exit-info 2 exit-code)
		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
		       (mutex-unlock! m)
		       (if (eq? pid-val 0)
			   (begin
			     (thread-sleep! 2)
			     (processloop (+ i 1))))
		       )))))
    (debug:print-info 0 "step " stepname " completed with exit code " (vector-ref exit-info 2))
    (debug:print-info 0 "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
    ;; now run logpro if needed
    (if logpro-used
	(let ((pid (process-run (conc "logpro " logpro-file " " (conc stepname ".html") " < " stepname ".log"))))
	  (let processloop ((i 0))
	    (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
			(mutex-lock! m)
			;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code)
			(vector-set! exit-info 0 pid)
			(vector-set! exit-info 1 exit-status)
			(vector-set! exit-info 2 exit-code)
			(launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
			(launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
			(launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
			(mutex-unlock! m)
			(if (eq? pid-val 0)
			    (begin
			      (thread-sleep! 2)
			      (processloop (+ i 1)))))
	    (debug:print-info 0 "logpro for step " stepname " exited with code " (vector-ref exit-info 2)))))
	    (debug:print-info 0 "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2)))))
    
    (let ((exinfo (vector-ref exit-info 2))
    (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
	  (logfna (if logpro-used (conc stepname ".html") "")))
      (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna))
    (if logpro-used
	(rmt:test-set-log! run-id test-id (conc stepname ".html")))
    ;; set the test final status
    (let* ((process-exit-status (vector-ref exit-info 2))
    (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
	   (this-step-status (cond
			      ((and (eq? process-exit-status 2) logpro-used) 'warn)
			      ((and (eq? process-exit-status 3) logpro-used) 'check)
			      ((and (eq? process-exit-status 4) logpro-used) 'abort)
			      ((eq? (vector-ref exit-info 2) 0)              'pass)
			      ((and (eq? process-exit-status 2) logpro-used) 'warn)  ;; logpro 2 = warnings
			      ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check
			      ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = abort			      
			      ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 4 = abort
			      ((eq? process-exit-status 0)                   'pass)  ;; logpro 0 = pass
			      (else 'fail)))
	   (overall-status   (cond
			      ((eq? (vector-ref exit-info 3) 2) 'warn) ;; rollup-status
			      ((eq? (vector-ref exit-info 3) 0) 'pass)
	   (overall-status   (conds
			      ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3)
			      ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3)
			      (else 'fail)))
	   (next-status      (cond 
			      ((eq? overall-status 'pass) this-step-status)
			      ((eq? overall-status 'warn)
			       (if (eq? this-step-status 'fail) 'fail 'warn))
			      ((eq? overall-status 'abort) 'abort)
			      (else 'fail)))
	   (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
	    (cond
	     ((null? tal) ;; more to run?
	      "COMPLETED")
	     (else "RUNNING")))
	   )
      (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
      (debug:print 4 "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used 
		   " this-step-status: " this-step-status " overall-status: " overall-status 
		   " next-status: " next-status " rollup-status: " (vector-ref exit-info 3))
		   " next-status: " next-status " rollup-status: "  (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3))
      (case next-status
	((warn)
	 (vector-set! exit-info 3 2) ;; rollup-status
	 (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "WARN" 
				 (if (eq? this-step-status 'warn) "Logpro warning found" #f)
				 #f))
	((check)
	 (vector-set! exit-info 3 3) ;; rollup-status
	 (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "CHECK" 
				 (if (eq? this-step-status 'check) "Logpro check found" #f)
				 #f))
	((abort)
	 (vector-set! exit-info 3 4) ;; rollup-status
	 (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 4) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "ABORT" 
				 (if (eq? this-step-status 'abort) "Logpro abort found" #f)
				 #f))
	((pass)
	 (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
	(else ;; 'fail
	 (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
	 (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
	 )))
    logpro-used))

(define (launch:execute encoded-cmd)
  
   (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
380
381
382
383
384
385
386
387

388
389
390
391
392
393
394
386
387
388
389
390
391
392

393
394
395
396
397
398
399
400







-
+







	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t 0))
		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
		 (job-thread   #f)
		 (keep-going   #t)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
				 ;;    (run-n-wait fullrunscript)))
				 ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
406
407
408
409
410
411
412
413
414
415
416




417
418
419
420
421
422
423
412
413
414
415
416
417
418




419
420
421
422
423
424
425
426
427
428
429







-
-
-
-
+
+
+
+







				 (if fullrunscript
				     (let ((pid (process-run fullrunscript)))
				       (rmt:test-set-top-process-pid run-id test-id pid)
				       (let loop ((i 0))
					 (let-values
					  (((pid-val exit-status exit-code) (process-wait pid #t)))
					  (mutex-lock! m)
					  (vector-set! exit-info 0 pid)
					  (vector-set! exit-info 1 exit-status)
					  (vector-set! exit-info 2 exit-code)
					  (vector-set! exit-info 3 exit-code)  ;; rollup status
					  (launch:einf-pid-set!           exit-info  pid)         ;; (vector-set! exit-info 0 pid)
					  (launch:einf-exit-status-set!   exit-info  exit-status) ;; (vector-set! exit-info 1 exit-status)
					  (launch:einf-exit-code-set!     exit-info  exit-code)   ;; (vector-set! exit-info 2 exit-code)
					  (launch:einf-rollup-status-set! exit-info  exit-code)   ;; (vector-set! exit-info 3 exit-code)  ;; rollup status
					  (mutex-unlock! m)
					  (if (eq? pid-val 0)
					      (begin
						(thread-sleep! 2)
						(loop (+ i 1)))
					      )))))
				 ;; then, if runscript ran ok (or did not get called)
433
434
435
436
437
438
439
440

441
442

443
444
445
446
447
448
449
439
440
441
442
443
444
445

446
447

448
449
450
451
452
453
454
455







-
+

-
+







				       ;; if ezsteps was defined then we are sure to have at least one step but check anyway
				       (if (not (> (length ezstepslst) 0))
					   (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length")
					   (let loop ((ezstep (car ezstepslst))
						      (tal    (cdr ezstepslst))
						      (prevstep #f))
					     ;; check exit-info (vector-ref exit-info 1)
					     (if (vector-ref exit-info 1)
					     (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
						 (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
						   (if (and (steprun-good? logpro-used (launch:einf-exit-code exit-info))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
						 (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
479
480
481
482
483
484
485
486

487
488
489
490
491
492
493
485
486
487
488
489
490
491

492
493
494
495
496
497
498
499







-
+







				       (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?
					     (let* ((pid1 (vector-ref exit-info 0))
					     (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
						    (pid2 (rmt:test-get-top-process-pid run-id test-id))
						    (pids (delete-duplicates (filter number? (list pid1 pid2)))))
					       (if (not (null? pids))
						   (begin
						     (for-each
						      (lambda (pid)
							(handle-exceptions
543
544
545
546
547
548
549
550
551


552
553
554
555


556
557
558
559

560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576



577
578
579
580
581
582
583
549
550
551
552
553
554
555


556
557
558
559


560
561
562
563
564

565
566
567
568
569
570
571
572
573
574
575
576
577
578
579



580
581
582
583
584
585
586
587
588
589







-
-
+
+


-
-
+
+



-
+














-
-
-
+
+
+







	      ;; Am I completed?
	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
				                                        ;; "COMPLETED"
							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
				    )
			(new-status (cond
				     ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
				     ((eq? (vector-ref exit-info 3) 0)
				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				     ((eq? (vector-ref exit-info 3) 1) "FAIL")
				     ((eq? (vector-ref exit-info 3) 2)
				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
				     ((eq? (launch:einf-rollup-status exit-info) 2)	     ;;	(vector-ref exit-info 3)
				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
		    (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " (vector-ref exit-info 3))
		    (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
		    (tests:test-set-status! run-id 
					    test-id 
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items run-id test-id test-name #f))
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
	      )
	    (mutex-unlock! m)
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (if (not (vector-ref exit-info 1))
	    (debug:print 2 "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
	    (if (not (launch:einf-exit-status exit-info))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (launch:setup-for-run #!key (force #f))
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 
  ;; pass on that idea for now