Megatest

Changes On Branch 158702d7aab0a0b0
Login

Changes In Branch sqlite-trials Excluding Merge-Ins

This is equivalent to a diff from fd82e8252e to 158702d7aa

2014-03-27
11:14
Merged sqlite-trials work to v1.55 check-in: 2b3cd8f3ca user: mrwellan tags: v1.55, v1.5516rc2
11:13
Tweaked timeouts and added more agressive exception handling to sqlite3 calls. Also cd to MT_RUN_AREA_HOME in dashboard if run in a test enviroment Closed-Leaf check-in: 158702d7aa user: mrwellan tags: sqlite-trials
2014-03-26
22:17
Trial work on exception handling for sqlite issues check-in: 846b99e992 user: mrwellan tags: sqlite-trials
2014-03-25
12:30
Bumped version. check-in: fd82e8252e user: icfadm tags: v1.55, v1.5516rc1
10:05
Speculative fix for the toplevel problem check-in: f8e4667eee user: matt tags: v1.55

Modified dashboard.scm from [a8322f10a6] to [18a5f67c75].

171
172
173
174
175
176
177


178
179
180
181
182
183
184
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186







+
+








(define uidat #f)

(define-inline (dboard:uidat-get-keycol  vec)(vector-ref vec 0))
(define-inline (dboard:uidat-get-lftcol  vec)(vector-ref vec 1))
(define-inline (dboard:uidat-get-header  vec)(vector-ref vec 2))
(define-inline (dboard:uidat-get-runsvec vec)(vector-ref vec 3))

(if (get-environment-variable "MT_RUN_AREA_HOME")(change-directory (get-environment-variable "MT_RUN_AREA_HOME")))

(define (message-window msg)
  (iup:show
   (iup:dialog
    (iup:vbox 
     (iup:label msg #:margin "40x40")))))

Modified db.scm from [820999b8e9] to [174cb68951].

71
72
73
74
75
76
77
78

79
80
81
82
83
84
85
71
72
73
74
75
76
77

78
79
80
81
82
83
84
85







-
+







	    (exit))))
  (let* ((dbpath    (conc *toppath* "/megatest.db")) ;; fname)
	 (dbexists  (file-exists? dbpath))
	 (write-access (file-write-access? dbpath))
	 (db        (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
					   (string->number (args:get-arg "-override-timeout"))
					   136000)))) ;; 136000))) ;; 136000 = 2.2 minutes
					   6000)))) ;; NB// this is in milliseconds. 136000))) ;; 136000 = 2.2 minutes
    (if (and dbexists
	     (not write-access))
	(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
    (debug:print-info 11 "open-db, dbpath=" dbpath " argv=" (argv))
    (if write-access (sqlite3:set-busy-handler! db handler))
    (if (not dbexists)
	(db:initialize db))
103
104
105
106
107
108
109



110
111
112
113
114
115
116











117
118
119
120

121
122
123
124
125
126
127
103
104
105
106
107
108
109
110
111
112







113
114
115
116
117
118
119
120
121
122
123
124
125
126

127
128
129
130
131
132
133
134







+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+



-
+







	(debug:print-info 11 "open-run-close-no-exception-handling END" )
	res)
      #f))

(define (open-run-close-exception-handling proc idb . params)
  (handle-exceptions
   exn
   (let ((sleep-time (random 30))
	 (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     (case err-status
   (begin
     (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
     (debug:print 0 "  " ((condition-property-accessor 'exn 'message) exn))
     (print-call-chain)
     (thread-sleep! (random 120))
     (debug:print-info 0 "trying db call one more time....")
     (apply open-run-close-no-exception-handling proc idb params))
       ((busy)
	(thread-sleep! sleep-time))
       (else
	(debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
	(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	(print "exn=" (condition->list exn))
	(debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	(print-call-chain)
	(thread-sleep! sleep-time)
	(debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
     (apply open-run-close-exception-handling proc idb params))
   (apply open-run-close-no-exception-handling proc idb params)))

;; (define open-run-close open-run-close-exception-handling)
(define open-run-close open-run-close-no-exception-handling)
(define open-run-close open-run-close-exception-handling)

(define *global-delta* 0)
(define *last-global-delta-printed* 0)

(define (open-run-close-measure  proc idb . params)
  (debug:print-info 11 "open-run-close-measure START, idb=" idb ", params=" params)
  (let* ((start-ms (current-milliseconds))
269
270
271
272
273
274
275

276

277
278
279
280
281
282
283
276
277
278
279
280
281
282
283

284
285
286
287
288
289
290
291







+
-
+







	   (debug:print 2 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test"
			((condition-property-accessor 'exn 'message) exn))
	   (set! db (sqlite3:open-database ":memory:"))) ;; open an in-memory db to allow readonly access 
	 (set! db (sqlite3:open-database dbpath)))
	(if *db-write-access* (sqlite3:set-busy-handler! db handler))
	(if (not dbexists)
	    (begin
	      ;; Why use FULL here? This data is not that critical
	      (sqlite3:execute db "PRAGMA synchronous = FULL;")
	      ;; (sqlite3:execute db "PRAGMA synchronous = FULL;")
	      (debug:print-info 11 "Initialized test database " dbpath)
	      (db:testdb-initialize db)))
	;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	(debug:print-info 11 "open-test-db END (sucessful)" work-area)
	;; now let's test that everything is correct
	(handle-exceptions
	 exn
1727
1728
1729
1730
1731
1732
1733




1734
1735
1736



1737
1738
1739
1740
1741
1742
1743
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745



1746
1747
1748
1749
1750
1751
1752
1753
1754
1755







+
+
+
+
-
-
-
+
+
+








;; db should be db open proc or #f
(define (cdb:remote-run proc db . params)
  (if (or *db-write-access*
	  (not (member proc *db:all-write-procs*)))
      (handle-exceptions
       exn
       (let ((sleep-time (random 20))
	     (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	 (case err-status
	   ((busy)(thread-sleep! 4))
       (begin 
	 (debug:print 0 "Problem with call to cdb:remote-run, database may be locked and read-only, waiting and trying again ...")
	 (thread-sleep! 10)
	   (else
	    (debug:print 0 "WARNING: possible problem with call to cdb:remote-run, database may be read-only and locked, waiting and trying again ...")
	    (thread-sleep! sleep-time)))
	 (apply cdb:remote-run proc db params))
       (apply cdb:client-call *runremote* 'immediate #f *default-numtries* open-run-close proc #f params))
      (begin
	(debug:print 0 "ERROR: Attempt to access read-only database")
	#f)))

(define (db:test-get-logfile-info db run-id test-name)
1943
1944
1945
1946
1947
1948
1949





















1950




1951
1952
1953
1954
1955
1956
1957
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982

1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+







    (if query
	;; hand queries off to the write queue
	(let ((response (case *transport-type*
			  ((http)
			   (debug:print-info 7 "Queuing item " item " for wrapped write")
			   (db:queue-write-and-wait db qry-sig query params))
			  (else 
			   (let* ((remtries 10)
				  (proc     #f))
			     (set! proc (lambda (remtries)
					  (if (> remtries 0)
					      (handle-exceptions
					       exn
					       (let ((sleep-time (random 30))
						     (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
						 (case err-status
						   ((busy)
						    (thread-sleep! sleep-time)
						    (proc 10)) ;; we never give up on busy
						   (else
						    (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
						    (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
						    (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status)  exn))
						    (print-call-chain)
						    (debug:print 0 "Sleeping for " sleep-time)
						    (thread-sleep! sleep-time)
						    (debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")
						    (proc (- remtries 1)))))
			   (apply sqlite3:execute db query params)
					       (apply sqlite3:execute db query params))
					      (debug:print 0 "ERROR: too many attempts to access db were made and no sucess. query: "
							   query ", params: " params))))
			     (proc remtries))
			   #t))))
	  (debug:print-info 7 "Received " response " from wrapped write")
	  (server:reply return-address qry-sig response response))
	;; otherwise if appropriate flush the queue (this is a read or complex query)
	(begin
	  (cond
	   ((member stmt-key db:special-queries)

Modified runs.scm from [16fd05bb4c] to [d043a7c00f].

459
460
461
462
463
464
465


466
467
468
469





470
471
472
473
474
475
476
477

478
479
480
481
482
483
484
459
460
461
462
463
464
465
466
467




468
469
470
471
472
473
474
475
476
477
478
479

480
481
482
483
484
485
486
487







+
+
-
-
-
-
+
+
+
+
+







-
+







             ;; (notinqueue (filter (lambda (x)
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member testmode '(toplevel))
	(for-each (lambda (prereq)
		    (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			(set! give-up #t)))
		  prereqstrs)
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id hed "")))
		(mt:test-set-state-status-by-id test-id "DEQUED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
		(mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)