Megatest

Check-in [93b72f20b1]
Login
Overview
Comment:Check if process still ACTUALLY running and if not go ahead and start the test
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60_defunct
Files: files | file ages | folders
SHA1: 93b72f20b1c470c320092ef2c272a0255f5dedcb
User & Date: matt on 2016-03-06 23:21:45
Other Links: branch diff | manifest | tags
Context
2016-03-07
08:36
Fix couple misnamed calls check-in: c3569862dc user: mrwellan tags: v1.60_defunct
2016-03-06
23:21
Check if process still ACTUALLY running and if not go ahead and start the test check-in: 93b72f20b1 user: matt tags: v1.60_defunct
20:52
Merged envprocessing into v1.60 check-in: a1d77f1a3b user: matt tags: v1.60_defunct
Changes

Modified dashboard.scm from [53c344e229] to [bad0524744].

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
		     (db:get-keys *dbstruct-local*)))

(define *dbkeys*  (append *keys* (list "runname")))

(define *header*       #f)
(define *allruns*     '())
(define *allruns-by-id* (make-hash-table)) ;; 
(define *runchangerate* (make-hash-table))

(define *buttondat*    (make-hash-table)) ;; <run-id color text test run-key>
(define *alltestnamelst* '())
(define *searchpatts*  (make-hash-table))
(define *num-runs*      8)
(define *tot-run-count* (if *useserver*
			    (rmt:get-num-runs "%")







<







111
112
113
114
115
116
117

118
119
120
121
122
123
124
		     (db:get-keys *dbstruct-local*)))

(define *dbkeys*  (append *keys* (list "runname")))

(define *header*       #f)
(define *allruns*     '())
(define *allruns-by-id* (make-hash-table)) ;; 


(define *buttondat*    (make-hash-table)) ;; <run-id color text test run-key>
(define *alltestnamelst* '())
(define *searchpatts*  (make-hash-table))
(define *num-runs*      8)
(define *tot-run-count* (if *useserver*
			    (rmt:get-num-runs "%")
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
		  ;; Not sure this is needed?
		  (set! referenced-run-ids (cons run-id referenced-run-ids))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (if (or (not *hide-empty-runs*) ;; this reduces the data burden when set
			  (not (null? tests)))
		      (let ((dstruct (vector run tests key-vals (- (current-seconds) 10))))
			;;
			;; compare the tests with the tests in *allruns-by-id* same run-id 
			;; if different then increment value in *runchangerate*
			;;
			(hash-table-set! *allruns-by-id* run-id dstruct)
			(set! result (cons dstruct result))))))
	      runs)

    (set! *header*  header)
    (set! *allruns* result)
    (debug:print-info 6 "*allruns* has " (length *allruns*) " runs")







<
<
<
<







303
304
305
306
307
308
309




310
311
312
313
314
315
316
		  ;; Not sure this is needed?
		  (set! referenced-run-ids (cons run-id referenced-run-ids))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (if (or (not *hide-empty-runs*) ;; this reduces the data burden when set
			  (not (null? tests)))
		      (let ((dstruct (vector run tests key-vals (- (current-seconds) 10))))




			(hash-table-set! *allruns-by-id* run-id dstruct)
			(set! result (cons dstruct result))))))
	      runs)

    (set! *header*  header)
    (set! *allruns* result)
    (debug:print-info 6 "*allruns* has " (length *allruns*) " runs")

Modified launch.scm from [e9781be215] to [d455976d72].

267
268
269
270
271
272
273
274


275
276
277
278




279
280
281
282
283
284
285
	    (set-signal-handler! signal/stop sighand))
	  
	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))


	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running




	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))







|
>
>




>
>
>
>







267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
	    (set-signal-handler! signal/stop sighand))
	  
	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (rmt:get-testinfo-state-status run-id test-id))
		 (test-host (db:test-get-host       test-info))
		 (test-pid  (db:test-get-process_id test-info)))
	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process-alive-on-host? test-host test-pid)
		  (debug:print 0 "ERROR: test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))

Modified process.scm from [99891d384e] to [7162768cf7].

145
146
147
148
149
150
151
152
















153
154
155
156
157
158
159
  (handle-exceptions
   exn
   ;; possibly pid is a process not a child, look in /proc to see if it is running still
   (file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))
	 
















(define (process:get-sub-pids pid)
  (with-input-from-pipe
   (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid)
   (lambda ()
     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
  (handle-exceptions
   exn
   ;; possibly pid is a process not a child, look in /proc to see if it is running still
   (file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))

(define (process:alive-on-host? host pid)
  (let ((cmd (conc "ssh " host " ps -o pid= -p " pid)))
    (handle-exceptions
     exn
     #f ;; anything goes wrong - assume the process in NOT running.
     (with-input-from-pipe 
      cmd
      (lambda ()
	(let loop ((inl (read-line)))
	  (if (eof-object? inl)
	      #f
	      (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl))
		     (innum     (string->number clean-str)))
		(and innum
		     (eq? pid innum))))))))))

(define (process:get-sub-pids pid)
  (with-input-from-pipe
   (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid)
   (lambda ()
     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)