Megatest

Check-in [7624379dd7]
Login
Overview
Comment:Cherry pick from 93b72f20b1: Check if process still ACTUALLY running and if not go ahead and start the test
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | rebase-envprocessing
Files: files | file ages | folders
SHA1: 7624379dd7cdb9078f23c0275e9c73ab9be3f839
User & Date: mrwellan on 2016-04-28 08:35:29
Other Links: branch diff | manifest | tags
Context
2016-04-28
08:37
Cherry pick from c3569862dc: Fix couple misnamed calls check-in: 3298442080 user: mrwellan tags: rebase-envprocessing
08:35
Cherry pick from 93b72f20b1: Check if process still ACTUALLY running and if not go ahead and start the test check-in: 7624379dd7 user: mrwellan tags: rebase-envprocessing
2016-04-27
17:12
Added rebase script check-in: 5ffeb83ec4 user: mrwellan tags: rebase-envprocessing
Changes

Modified dashboard.scm from [53c344e229] to [bad0524744].

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
111
112
113
114
115
116
117

118
119
120
121
122
123
124







-







		     (db:get-keys *dbstruct-local*)))

(define *dbkeys*  (append *keys* (list "runname")))

(define *header*       #f)
(define *allruns*     '())
(define *allruns-by-id* (make-hash-table)) ;; 
(define *runchangerate* (make-hash-table))

(define *buttondat*    (make-hash-table)) ;; <run-id color text test run-key>
(define *alltestnamelst* '())
(define *searchpatts*  (make-hash-table))
(define *num-runs*      8)
(define *tot-run-count* (if *useserver*
			    (rmt:get-num-runs "%")
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
303
304
305
306
307
308
309




310
311
312
313
314
315
316







-
-
-
-







		  ;; Not sure this is needed?
		  (set! referenced-run-ids (cons run-id referenced-run-ids))
		  (if (> (length tests) maxtests)
		      (set! maxtests (length tests)))
		  (if (or (not *hide-empty-runs*) ;; this reduces the data burden when set
			  (not (null? tests)))
		      (let ((dstruct (vector run tests key-vals (- (current-seconds) 10))))
			;;
			;; compare the tests with the tests in *allruns-by-id* same run-id 
			;; if different then increment value in *runchangerate*
			;;
			(hash-table-set! *allruns-by-id* run-id dstruct)
			(set! result (cons dstruct result))))))
	      runs)

    (set! *header*  header)
    (set! *allruns* result)
    (debug:print-info 6 "*allruns* has " (length *allruns*) " runs")

Modified fsl-rebase.scm from [d386c1a856] to [d4dd53982d].

1
2
3
4
5
6
7
8
9
10
11
12
13
14


15
16
17
18
19
20
21
22


23
24
25








26
27
28



1
2
3
4
5
6
7
8
9
10
11
12


13
14
15
16
17
18
19
20
21

22
23
24
25
26
27
28
29
30
31
32
33
34



35
36
37












-
-
+
+







-
+
+



+
+
+
+
+
+
+
+
-
-
-
+
+
+
;; given branch and baseline commit generate list of commands to cherry pick commits
;;
;;
;; Usage: fsl-rebase basecommit branch
;;         

(use regex posix)

(let* ((basecommit (cadr (argv)))
       (branch     (caddr (argv)))
       (cmd        (conc "fossil timeline after " basecommit " -n 1000000 -W 0"))
       (theregex   (conc ;; "^[^\\]]+"
			 ;; "\\[([\\]]+)\\]\\s+"
			 ;; "(.*)"
			 "\\[([a-z0-9]+)\\]\\s+"
			 "(.*)"
			 "\\s+\\(.*tags:\\s+" branch 
			 ;; ".*\\)"
			 )))
  (print "basecommit: " basecommit ", branch: " branch ", theregex: " theregex ", cmd: \"" cmd "\"")
  (with-input-from-pipe
   cmd
   (lambda ()
     (let loop ((inl (read-line)))
     (let loop ((inl (read-line))
		(res '()))
       (if (not (eof-object? inl))
	   (let ((have-match (string-search theregex inl)))
	     (if have-match
		 (loop (read-line)
		       (cons (conc "fossil merge --cherrypick " (cadr have-match)
				   "\nfossil commit -m \"Cherry pick from " (cadr have-match)
				   ": " (caddr have-match) "\"")
			     res))
		 (loop (read-line) res)))
	   (map print res))))))

		 (print "match: " inl)
		 (print "no match: " theregex " " inl))
	     (loop (read-line))))))))
;; (print "match: " inl "\n   $1: " (cadr have-match) " $2: " (caddr have-match))
;; (print "no match: " theregex " " inl))
;; (loop (read-line))))))))

Modified launch.scm from [80a89ca909] to [8e89190962].

267
268
269
270
271
272
273
274



275
276
277
278




279
280
281
282
283
284
285
267
268
269
270
271
272
273

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291







-
+
+
+




+
+
+
+







	    (set-signal-handler! signal/stop sighand))
	  
	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
	  (let* ((test-info (rmt:get-testinfo-state-status run-id test-id))
		 (test-host (db:test-get-host       test-info))
		 (test-pid  (db:test-get-process_id test-info)))
	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process-alive-on-host? test-host test-pid)
		  (debug:print 0 "ERROR: test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))

Modified process.scm from [99891d384e] to [7162768cf7].

145
146
147
148
149
150
151
152

















153
154
155
156
157
158
159
145
146
147
148
149
150
151

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175







-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (handle-exceptions
   exn
   ;; possibly pid is a process not a child, look in /proc to see if it is running still
   (file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))
	 

(define (process:alive-on-host? host pid)
  (let ((cmd (conc "ssh " host " ps -o pid= -p " pid)))
    (handle-exceptions
     exn
     #f ;; anything goes wrong - assume the process in NOT running.
     (with-input-from-pipe 
      cmd
      (lambda ()
	(let loop ((inl (read-line)))
	  (if (eof-object? inl)
	      #f
	      (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl))
		     (innum     (string->number clean-str)))
		(and innum
		     (eq? pid innum))))))))))

(define (process:get-sub-pids pid)
  (with-input-from-pipe
   (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid)
   (lambda ()
     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)