Megatest

Check-in [d25ec76b5c]
Login
Overview
Comment:Removed some unused code. Upped rest time from 12 seconds to 30 seconds for runners in parallel
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65 | v1.6562
Files: files | file ages | folders
SHA1: d25ec76b5c7208f7e1489951b5f56ef56ba5d434
User & Date: mrwellan on 2020-08-18 12:53:21
Other Links: branch diff | manifest | tags
Context
2020-08-18
17:06
Bumped version check-in: bdbca349cb user: mrwellan tags: v1.65, v1.6562
12:53
Removed some unused code. Upped rest time from 12 seconds to 30 seconds for runners in parallel check-in: d25ec76b5c user: mrwellan tags: v1.65, v1.6562
12:13
updates and fixes to archive megatestdb check-in: 47e78ace28 user: pjhatwal tags: v1.65
Changes

Modified runs.scm from [eb3b96455d] to [b44d99af31].

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  reglen regfull
  runname max-concurrent-jobs run-id
  test-patts required-tests test-registry
  registry-mutex flags keyvals run-info all-tests-registry
  can-run-more-tests
  ((can-run-more-tests-count 0) : fixnum)
  (last-fuel-check         0)  ;; time when we last checked fuel
;;  (last-runners-count        #f)  ;; 
;;  (runner-registered         #f)  ;; have I registered myself?
;;  (run-skip-count             0)  ;; how many times have I skipped running sequentially
  (runners-mgmt-mode          'rest-mode)
  )

(defstruct runs:testdat
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal itemmaps prereqs-not-met)
  
;; Fourth try, do accounting through time
;;
(define (runs:parallel-runners-mgmt-4 rdat)
  (let ((time-to-check 10) ;; 28
	(time-to-wait  12)
	(now-time      (current-seconds)))
    (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check
	(let* ((fuel-used          (or (rmt:get-var "runners-fuel") now-time)))
	  ;; initialize and sanitize values if needed
	  (if (> fuel-used (+ now-time 1)) ;; are we over-drawn? If so, kill time, do not add time to fuel used
	      (begin ;; gonna rest
		(debug:print-info 0 *default-log-port* "Runner load high, taking a break.")
		(thread-sleep! time-to-wait)
		(runs:dat-last-fuel-check-set! rdat (current-seconds)) ;; work done from here (i.e. seconds of "fuel") will be added to fuel-used 
		)
	      (begin ;; no fuel deficit, back to work
		(rmt:set-var "runners-fuel" (+ now-time time-to-check))
		))))))

;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files
;;  - remove any that are over 3600 seconds old
;;  - if there are any that are younger than 10 seconds
;;      * sleep 10 seconds
;;      * touch my key-host-pid.softlock file
;;      * return
;;  - if there are no files younger than 10 seconds







|
<
<
<







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







46
47
48
49
50
51
52
53



54
55
56
57
58
59
60



















61
62
63
64
65
66
67
  reglen regfull
  runname max-concurrent-jobs run-id
  test-patts required-tests test-registry
  registry-mutex flags keyvals run-info all-tests-registry
  can-run-more-tests
  ((can-run-more-tests-count 0) : fixnum)
  (last-fuel-check         0)  ;; time when we last checked fuel
  (beginning-of-time       (current-seconds))



  )

(defstruct runs:testdat
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal itemmaps prereqs-not-met)
  



















;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files
;;  - remove any that are over 3600 seconds old
;;  - if there are any that are younger than 10 seconds
;;      * sleep 10 seconds
;;      * touch my key-host-pid.softlock file
;;      * return
;;  - if there are no files younger than 10 seconds
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
		  (close-output-port ouf))))
	  (runs:dat-last-fuel-check-set! rdat (current-seconds))))))
  
;; Fourth try, do accounting through time
;;
(define (runs:parallel-runners-mgmt rdat)
  (let ((time-to-check 10) ;; 28
	(time-to-wait  12)
	(now-time      (current-seconds)))
    (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check
	(runs:wait-on-softlock rdat "runners"))))

;; To test parallel-runners management start a repl:
;;  megatest -repl
;; then run:







|







101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
		  (close-output-port ouf))))
	  (runs:dat-last-fuel-check-set! rdat (current-seconds))))))
  
;; Fourth try, do accounting through time
;;
(define (runs:parallel-runners-mgmt rdat)
  (let ((time-to-check 10) ;; 28
	(time-to-wait  30)
	(now-time      (current-seconds)))
    (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check
	(runs:wait-on-softlock rdat "runners"))))

;; To test parallel-runners management start a repl:
;;  megatest -repl
;; then run:
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
	      (thread-sleep! work-time)
	      (set! rtime (+ rtime work-time))
	      ((or proc runs:parallel-runners-mgmt) rdat)
	      (loop)))))
    (let* ((done-time (current-seconds)))
      (debug:print-info 0 *default-log-port* "DONE: rtime=" rtime ", elapsed time=" (- done-time startt)
			", ratio=" (/ rtime (- done-time startt))))))

;; ;; Third try, use a running average
;; ;;
;; ;;    ADD A COUNT OF TIMES CYCLED THROUGH REST MODE
;; ;;
;; ;;  runners-mgmt-mode 
;; ;;
;; (define (runs:parallel-runners-mgmt-3 rdat)
;;   (let ((time-to-check 2.8) ;; 28
;; 	(time-to-wait  3.0))
;;     (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check
;; 	(let* ((skip-count         (runs:dat-run-skip-count rdat))
;; 	       (mgmt-mode          (runs:dat-runners-mgmt-mode rdat)) ;;
;; 	       (num-registered     (rmt:get-var "num-runners"))
;; 	       (last-runners-count (if (runs:dat-last-runners-count rdat)
;; 				       (runs:dat-last-runners-count rdat)
;; 				       (or num-registered 1)))
;; 	       (last-runners-ravg  (/ (+ last-runners-count num-registered) 2)#;(if (> num-registered last-runners-count)
;; 				       (/ (+ last-runners-count num-registered) 2)
;; 				       (/ (+ (* num-registered 4) last-runners-count) 5) ;; slow on down
;; 				       ))     ;; running average
;; 	       )
;; 	  ;; initialize and sanitize values if needed
;; 	  (cond
;; 	   ((not num-registered) ;; first in, initialize to 1
;; 	    (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is not defined")
;; 	    (rmt:set-var "num-runners" 1))
;; 	   ((< num-registered 1) ;; this should not be, reset to 1 to make it less confusing
;; 	    (debug:print-info 0 *default-log-port* " adjusting num-runners up to 1, currently it is " num-registered)
;; 	    (rmt:set-var "num-runners" 1)))
;; 	  (if (not (member mgmt-mode '(rest-mode work-mode)))
;; 	      (begin
;; 		(debug:print-info 0 *default-log-port* " setting mgmt-mode to work-mode, currently it is " mgmt-mode)
;; 		(rmt:inc-var "num-runners")
;; 		(set! last-runners-ravg (+ last-runners-ravg 1))
;; 		(runs:dat-runners-mgmt-mode-set! rdat 'rest-mode)))
;; 	  
;; 	  (runs:dat-last-runners-count-set! rdat last-runners-ravg)
;; 	  ;; to rest or not rest?
;; 	  (if (and (< skip-count 5)
;; 		   (> num-registered last-runners-count)) ;;(+ last-runners-ravg 0.5))) ;; there seem to be other runners out there
;; 	      (begin ;; gonna rest
;; 		(debug:print-info 0 *default-log-port* "Too many running, num-registered=" num-registered ", ravg=" last-runners-ravg
;; 				  ", real num runners=" (rmt:get-var "num-runners") ", skip-count=" skip-count)
;; 		(if (eq? mgmt-mode 'work-mode)
;; 		    (rmt:dec-var "num-runners"))
;; 		(runs:dat-runners-mgmt-mode-set! rdat 'rest-mode)
;; 		(runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1))
;; 		(thread-sleep! time-to-wait)
;; 		(runs:parallel-runners-mgmt-3 rdat)
;; 		)
;; 	      (begin
;; 		(runs:dat-run-skip-count-set! rdat 0)
;; 		(if (eq? mgmt-mode 'rest-mode)
;; 		    (rmt:inc-var "num-runners")) ;; going into work mode if not already in work mode
;;  		(runs:dat-runners-mgmt-mode-set! rdat 'work-mode)
;; 		(debug:print-info 0 *default-log-port* "All good, keep running, num-registered="
;; 				  num-registered ", ravg=" last-runners-ravg ", mode=" mgmt-mode
;; 				   ", skip-count=" skip-count))
;; 	      )))))

;; (define (runs:print-parallel-runners-state state num-registered last-registered skip-count)
;;   (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state
;; 		    ", num-registered=" num-registered ", last-registered=" last-registered
;; 		    ", skip-count=" skip-count))
;; 
;; (define (runs:print-parallel-runners-state2 state num-registered last-runners-count skip-count)
;;   (debug:print-info 0 *default-log-port* "runs:parallel-runners-mgmt, state=" state
;; 		    ", num-registered=" num-registered ", last-runners-count=" last-runners-count
;; 		    ", skip-count=" skip-count))
;; 
;; ;; Second try
;; ;;
;; (define (runs:parallel-runners-mgmt-2 rdat)
;;   (let ((time-to-check 2.8) ;; 28
;; 	(time-to-wait  3.0))
;;     (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check
;; 	(let* ((num-registered     (or (rmt:get-var "num-runners")        0))
;; 	       (last-runners-count (runs:dat-last-runners-count rdat))
;; 	       (skip-count      (runs:dat-run-skip-count rdat)))
;; 	  (cond
;; 	   ;; first time in
;; 	   ((not last-runners-count)
;; 	    (runs:print-parallel-runners-state2 "A" num-registered last-runners-count skip-count)
;; 	    (if (eq? num-registered 0)
;; 		(rmt:set-var "num-runners" 1)
;; 		(rmt:inc-var "num-runners"))
;; 	    (runs:dat-last-runners-count-set! rdat num-registered)
;; 	    (runs:dat-run-skip-count-set! rdat 0))
;; 	   ;; too many waits, decrement num-runners, reset last-runners and continue on
;; 	   ((> (runs:dat-run-skip-count rdat) 3) 
;; 	    (runs:print-parallel-runners-state2 "B" num-registered last-runners-count skip-count)
;; 	    (rmt:dec-var "num-runners")
;; 	    (runs:dat-run-skip-count-set! rdat 0)
;; 	    (runs:dat-last-runners-count-set! rdat num-registered))
;; 	    ;; too many running, take a break
;; 	   ((> num-registered last-runners-count) ;; (+ last-runners-count 1))
;; 	    (runs:print-parallel-runners-state2 "C" num-registered last-runners-count skip-count)
;; 	    (rmt:dec-var "num-runners")
;; 	    (debug:print-info 0 *default-log-port*
;; 			      "Too many running (" num-registered
;; 			      "), last-count=" last-runners-count " waiting " time-to-wait " seconds ... ")
;; 	    (thread-sleep! time-to-wait)
;; 	    (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1))
;; 	    ;; adjust down last-runners-count
;; 	    (if (< num-registered last-runners-count)
;; 		(runs:dat-last-runners-set! rdat num-running))
;; 	    (rmt:inc-var "num-runners")
;; 	    )
;; 	   ;; we have been in waiting mode, do not increment again as we already did that
;; 	   ((> skip-count 0)
;; 	    (runs:print-parallel-runners-state2 "D" num-registered last-runners-count skip-count)
;; 	    (runs:dat-run-skip-count-set! rdat 0)
;; 	    ;; (runs:dat-last-runners-count-set! rdat num-registered)
;; 	    )
;; 	   ;; skip count is zero, not too many running, this is transition into running
;; 	   (else
;; 	    (runs:print-parallel-runners-state2 "E" num-registered last-runners-count skip-count)
;; 	    ;; (rmt:inc-var "num-runners")
;; 	    #;(runs:dat-run-skip-count-set! rdat 0)))))))

;; (define (runs:parallel-runners-mgmt rdat)
;;   (let ((time-to-check 2.8) ;; 28
;; 	(time-to-wait  3.0))
;;   (if (> (- (current-seconds) (runs:dat-last-runners-check rdat)) time-to-check) ;; time to check
;;       (let* ((num-registered  (or (rmt:get-var "num-runners")        0))
;; 	     (last-registered (or (rmt:get-var "runner-change-time") 0))
;; 	     (skip-count      (runs:dat-run-skip-count rdat)))
;; 	(cond
;; 	 ;; consider this the beginning of time
;; 	 ((eq? num-registered 0)
;; 	  (runs:print-parallel-runners-state "A" num-registered last-registered skip-count)
;; 	  (rmt:set-var "num-runners" 1) ;; potential bug - not ACID
;; 	  (rmt:set-var "runner-change-time" (current-seconds))
;; 	  (runs:dat-last-runners-check-set! rdat (current-seconds))
;; 	  (runs:dat-runner-registered-set!  rdat #t)
;; 	  (runs:dat-run-skip-count-set!     rdat 0))
;; 	 ;; have headroom to run another
;; 	 ((< num-registered 3)
;; 	  (runs:print-parallel-runners-state "B" num-registered last-registered skip-count)
;; 	  (rmt:inc-var "num-runners")
;; 	  (rmt:set-var "runner-change-time" (current-seconds))
;; 	  (runs:dat-last-runners-check-set! rdat (current-seconds))
;; 	  (runs:dat-run-skip-count-set!     rdat 0))
;; 	 ;; we've waited too many rounds, gonna force a round
;; 	 ((> (runs:dat-run-skip-count rdat) 3)
;; 	  (runs:print-parallel-runners-state "C" num-registered last-registered skip-count)
;; 	  (rmt:set-var "num-runners" 1)
;; 	  ;; (rmt:set-var "runner-change-time" (current-seconds))
;; 	  (runs:dat-last-runners-check-set! rdat (current-seconds))
;; 	  (runs:dat-run-skip-count-set!     rdat 0))
;; 	 ;; have too many runners working, but this is the first time to wait since doing some work
;; 	 ((eq? (runs:dat-run-skip-count rdat) 0) ;; and num-registered is >= 3
;; 	  (runs:print-parallel-runners-state "D" num-registered last-registered skip-count)
;; 	  (if (not (eq? (runs:dat-last-runners-check rdat) 0)) ;; do not decrement if we've never incremented
;; 	      (begin
;; 		(rmt:dec-var "num-runners")
;; 		#;(rmt:set-var "runner-change-time" (current-seconds))))
;; 	  (runs:dat-last-runners-check-set! rdat (current-seconds))
;; 	  (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1))
;; 	  (debug:print-info 0 *default-log-port* "Too many runners working (" num-registered
;; 			    "). Resting for 30 seconds.")
;; 	  (thread-sleep! time-to-wait)
;; 	  (runs:parallel-runners-mgmt rdat))
;;          ;; ok, keep waiting
;; 	 (else
;; 	  (runs:print-parallel-runners-state "E" num-registered last-registered skip-count)
;; 	  (thread-sleep! time-to-wait)
;; 	  (runs:dat-run-skip-count-set! rdat (+ (runs:dat-run-skip-count rdat) 1))
;; 	  (runs:parallel-runners-mgmt rdat)))))))


(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







130
131
132
133
134
135
136











































































































































































137
138
139
140
141
142
143
	      (thread-sleep! work-time)
	      (set! rtime (+ rtime work-time))
	      ((or proc runs:parallel-runners-mgmt) rdat)
	      (loop)))))
    (let* ((done-time (current-seconds)))
      (debug:print-info 0 *default-log-port* "DONE: rtime=" rtime ", elapsed time=" (- done-time startt)
			", ratio=" (/ rtime (- done-time startt))))))












































































































































































(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)

357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
         (cons  (car key) (cadr key)))
       (keys:target->keyval (rmt:get-keys) target))
    
    ,@(map (lambda (var)
             (let ((val (configf:lookup *configdat* "env-override" var)))
               (cons var val)))
           (configf:section-vars *configdat* "env-override"))))


    

    
    
;; set up needed environment variables given a run-id and optionally a target, itempath etc.
;;
(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  (let* ((target    (or intarget 
			(common:args-get-target)







<
<
<
<
<







164
165
166
167
168
169
170





171
172
173
174
175
176
177
         (cons  (car key) (cadr key)))
       (keys:target->keyval (rmt:get-keys) target))
    
    ,@(map (lambda (var)
             (let ((val (configf:lookup *configdat* "env-override" var)))
               (cons var val)))
           (configf:section-vars *configdat* "env-override"))))





    
;; set up needed environment variables given a run-id and optionally a target, itempath etc.
;;
(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  (let* ((target    (or intarget 
			(common:args-get-target)