Megatest

Check-in [42fa11c691]
Login
Overview
Comment:Added retry on remove when blocked by toplevel with not-yet-removed child tests
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: 42fa11c69110d54778095cbab8b9aeaf0cbedefd
User & Date: matt on 2014-06-17 00:07:33
Other Links: branch diff | manifest | tags
Context
2014-06-17
10:36
Remove in mem db for monitor.db - it did not work right in read-only context check-in: 02c7d8cb87 user: mrwellan tags: v1.55
00:07
Added retry on remove when blocked by toplevel with not-yet-removed child tests check-in: 42fa11c691 user: matt tags: v1.55
2014-06-16
15:38
Added support for building with older iup on RedHad/CentOS check-in: c6380019ef user: mrwellan tags: v1.55
Changes

Modified runs.scm from [d796cced81] to [a35316fdbf].

1361
1362
1363
1364
1365
1366
1367

1368
1369
1370
1371
1372
1373
1374
		   (else
		    (debug:print-info 0 "action not recognised " action)))
		 (let ((sorted-tests     (sort tests (lambda (a b)(let ((dira (db:test-get-rundir a))
									(dirb (db:test-get-rundir b)))
								    (if (and (string? dira)(string? dirb))
									(> (string-length dira)(string-length dirb))
									#f)))))

		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (cdb:get-test-info-by-id *runremote* test-id)))
		       (if (not new-test-dat)







>







1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
		   (else
		    (debug:print-info 0 "action not recognised " action)))
		 (let ((sorted-tests     (sort tests (lambda (a b)(let ((dira (db:test-get-rundir a))
									(dirb (db:test-get-rundir b)))
								    (if (and (string? dira)(string? dirb))
									(> (string-length dira)(string-length dirb))
									#f)))))
		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (cdb:get-test-info-by-id *runremote* test-id)))
		       (if (not new-test-dat)
1383
1384
1385
1386
1387
1388
1389

1390






1391
1392
1393
1394
1395
1396
1397
				  (uname         (db:test-get-uname    new-test-dat))
				  (toplevel-with-children (and (db:test-get-is-toplevel test)
							       (> (cdb:remote-run db:test-toplevel-num-items db run-id test-name) 0))))
			     (case action
			       ((remove-runs)
				;; if the test is a toplevel-with-children issue an error and do not remove
				(if toplevel-with-children

				    (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")






				    (begin
				      (debug:print-info 0 "test: " test-name " itest-state: " test-state)
				      (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
					  (begin
					    (if (not (hash-table-ref/default test-retry-time test-fulln #f))
						(begin
						  ;; want to set to REMOVING BUT CANNOT do it here?







>
|
>
>
>
>
>
>







1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
				  (uname         (db:test-get-uname    new-test-dat))
				  (toplevel-with-children (and (db:test-get-is-toplevel test)
							       (> (cdb:remote-run db:test-toplevel-num-items db run-id test-name) 0))))
			     (case action
			       ((remove-runs)
				;; if the test is a toplevel-with-children issue an error and do not remove
				(if toplevel-with-children
				    (begin
				      (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
				      (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
				      (if (> (hash-table-ref toplevel-retries test-fulln) 3)
					  (if (not (null? tal))
					      (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
					  (let ((newtal (append tal (list test))))
					    (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue
				    (begin
				      (debug:print-info 0 "test: " test-name " itest-state: " test-state)
				      (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
					  (begin
					    (if (not (hash-table-ref/default test-retry-time test-fulln #f))
						(begin
						  ;; want to set to REMOVING BUT CANNOT do it here?

tests/installall/config/megatest.config.dat became a symlink with target [736a5da885].

tests/installall/config/runconfigs.config.dat became a symlink with target [3b8f260acb].