Megatest

Changes On Branch 437eaccb58cd780c
Login

Changes In Branch ezsteps-refactor Through [437eaccb58] Excluding Merge-Ins

This is equivalent to a diff from 37efe35171 to 437eaccb58

2015-04-01
01:20
Merged in ezsteps refactor branch check-in: db2ccc3980 user: matt tags: v1.60
00:08
Added temp test for looking at failed killing of jobs check-in: 9fc571c903 user: matt tags: ezsteps-refactor
2015-03-31
22:55
Partial removal of bash script for ezsteps check-in: 437eaccb58 user: matt tags: ezsteps-refactor
08:24
Added makefile to batchsim check-in: 37efe35171 user: mrwellan tags: v1.60
2015-03-30
22:49
Improved recovery when communications goes bad in http transport check-in: 2c6622a2b7 user: matt tags: v1.60

Modified launch.scm from [2480988c90] to [b7c083578f].

9
10
11
12
13
14
15
16

17
18
19
20
21
22
23
;;  PURPOSE.

;;======================================================================
;; launch a task - this runs on the originating host, tests themselves
;;
;;======================================================================

(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3)

(import (prefix base64 base64:))
(import (prefix sqlite3 sqlite3:))

(declare (unit launch))
(declare (uses common))
(declare (uses configf))
(declare (uses db))







|
>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
;;  PURPOSE.

;;======================================================================
;; launch a task - this runs on the originating host, tests themselves
;;
;;======================================================================

(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables)

(import (prefix base64 base64:))
(import (prefix sqlite3 sqlite3:))

(declare (unit launch))
(declare (uses common))
(declare (uses configf))
(declare (uses db))
251
252
253
254
255
256
257
258


259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277

278
279
280
281
282
283
284
285
286
287
288
289
290
291


















292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
					     ;; check exit-info (vector-ref exit-info 1)
					     (if (vector-ref exit-info 1)
						 (let* ((stepname  (car ezstep))  ;; do stuff to run the step
							(stepinfo  (cadr ezstep))
							(stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo))
							(stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each 
							(stepcmd   (list-ref stepparts 3))
							(script    "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!


							(logpro-used #f))
						   ;; NB// can safely assume we are in test-area directory
						   (debug:print 4 "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
								" stepparms: " stepparms " stepcmd: " stepcmd)
						   
						   (if (file-exists? (conc stepname ".logpro"))(set! logpro-used #t))

						   ;; ;; first source the previous environment
						   ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
						   ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
						   ;;   (if (and prevstep (file-exists? prev-env))
						   ;;       (set! script (conc script "source " prev-env))))
						   
						   ;; call the command using mt_ezstep
						   (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))

						   (debug:print 4 "script: " script)
						   (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
						   ;; now launch

						   (let ((pid (process-run script)))
						     (rmt:test-set-top-process-pid run-id test-id pid)
						     (let processloop ((i 0))
						       (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
								   (mutex-lock! m)
								   (vector-set! exit-info 0 pid)
								   (vector-set! exit-info 1 exit-status)
								   (vector-set! exit-info 2 exit-code)
								   (mutex-unlock! m)
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   ))


















                                                     (let ((exinfo (vector-ref exit-info 2))
                                                           (logfna (if logpro-used (conc stepname ".html") "")))
						       (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna))
						     (if logpro-used
							 (rmt:test-set-log! run-id test-id (conc stepname ".html")))
						     ;; set the test final status
						     (let* ((this-step-status (cond
									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									       ((eq? (vector-ref exit-info 2) 0)                   'pass)
									       (else 'fail)))
							    (overall-status   (cond
									       ((eq? rollup-status 2) 'warn)
									       ((eq? rollup-status 0) 'pass)
									       (else 'fail)))
							    (next-status      (cond 
									       ((eq? overall-status 'pass) this-step-status)
									       ((eq? overall-status 'warn)
										(if (eq? this-step-status 'fail) 'fail 'warn))
									       (else 'fail)))
							    (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
							                       (cond
									       ((null? tal) ;; more to run?
									        "COMPLETED")
									       (else "RUNNING")))
							    )
						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (tests:test-set-status! run-id test-id next-state "WARN" 
									  (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									  #f))
							 ((pass)
							  (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
							  (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
						 (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 







|
>
>
|




<
<







|
|


|
>
|












|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







252
253
254
255
256
257
258
259
260
261
262
263
264
265
266


267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
					     ;; check exit-info (vector-ref exit-info 1)
					     (if (vector-ref exit-info 1)
						 (let* ((stepname  (car ezstep))  ;; do stuff to run the step
							(stepinfo  (cadr ezstep))
							(stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo))
							(stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each 
							(stepcmd   (list-ref stepparts 3))
							(script    "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
							(logpro-file (conc stepname ".logpro"))
							(html-file   (conc stepname ".html"))
							(logpro-used (file-exists? logpro-file)))
						   ;; NB// can safely assume we are in test-area directory
						   (debug:print 4 "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
								" stepparms: " stepparms " stepcmd: " stepcmd)
						   


						   ;; ;; first source the previous environment
						   ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
						   ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
						   ;;   (if (and prevstep (file-exists? prev-env))
						   ;;       (set! script (conc script "source " prev-env))))
						   
						   ;; call the command using mt_ezstep
						   ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
						   
						   (debug:print 4 "script: " script)
						   (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
						   ;; now launch the actual process
						   (let* ((cmd (conc stepcmd " > " stepname ".log"))
							  (pid (process-run cmd)))
						     (rmt:test-set-top-process-pid run-id test-id pid)
						     (let processloop ((i 0))
						       (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
								   (mutex-lock! m)
								   (vector-set! exit-info 0 pid)
								   (vector-set! exit-info 1 exit-status)
								   (vector-set! exit-info 2 exit-code)
								   (mutex-unlock! m)
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   )))
						   (debug:print-info 0 "step " stepname " completed with exit code " (vector-ref exit-info 2))
						   ;; now run logpro if needed
						   (if logpro-used
						       (let ((pid (process-run (conc "logpro " logpro-file " " (conc stepname ".html") " < " stepname ".log"))))
							 (let processloop ((i 0))
							   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
								       (mutex-lock! m)
								       ;; (vector-set! exit-info 0 pid)
								       (vector-set! exit-info 1 exit-status)
								       (vector-set! exit-info 2 exit-code)
								       (mutex-unlock! m)
								       (if (eq? pid-val 0)
									   (begin
									     (thread-sleep! 2)
									     (processloop (+ i 1))))
								       )
							   (debug:print-info 0 "logpro for step " stepname " exited with code " (vector-ref exit-info 2)))))
						   
						   (let ((exinfo (vector-ref exit-info 2))
							 (logfna (if logpro-used (conc stepname ".html") "")))
						     (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna))
						   (if logpro-used
						       (rmt:test-set-log! run-id test-id (conc stepname ".html")))
						   ;; set the test final status
						   (let* ((this-step-status (cond
									     ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									     ((eq? (vector-ref exit-info 2) 0)                   'pass)
									     (else 'fail)))
							  (overall-status   (cond
									     ((eq? rollup-status 2) 'warn)
									     ((eq? rollup-status 0) 'pass)
									     (else 'fail)))
							  (next-status      (cond 
									     ((eq? overall-status 'pass) this-step-status)
									     ((eq? overall-status 'warn)
									      (if (eq? this-step-status 'fail) 'fail 'warn))
									     (else 'fail)))
							  (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
							   (cond
							    ((null? tal) ;; more to run?
							     "COMPLETED")
							    (else "RUNNING")))
							  )
						     (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								  " this-step-status: " this-step-status " overall-status: " overall-status 
								  " next-status: " next-status " rollup-status: " rollup-status)
						     (case next-status
						       ((warn)
							(set! rollup-status 2)
							;; NB// test-set-status! does rdb calls under the hood
							(tests:test-set-status! run-id test-id next-state "WARN" 
										(if (eq? this-step-status 'warn) "Logpro warning found" #f)
										#f))
						       ((pass)
							(tests:test-set-status! run-id test-id next-state "PASS" #f #f))
						       (else ;; 'fail
							(set! rollup-status 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
							(tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
							))))
						 (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							  (not (null? tal)))
						     (loop (car tal) (cdr tal) stepname)))
					     (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep)))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds)