Megatest

Check-in [f2dc94dec8]
Login
Overview
Comment:Be more agressive about removing lockdb when things go poorly
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | static-html
Files: files | file ages | folders
SHA1: f2dc94dec817b5c78bea25b9425d3e6fed633109
User & Date: matt on 2015-03-03 20:50:04
Other Links: branch diff | manifest | tags
Context
2015-03-03
22:25
speculative additional fix for html update lock issue check-in: 8aa804b793 user: matt tags: static-html
20:50
Be more agressive about removing lockdb when things go poorly check-in: f2dc94dec8 user: matt tags: static-html
2015-03-02
21:55
Peek at lock - if stuff in flight, don't bother getting in the queue check-in: c8adfd5902 user: matt tags: static-html
Changes

Modified lock-queue.scm from [759ae6ed41] to [4e7622a7f2].

24
25
26
27
28
29
30




31
32
33
34
35
36
37
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41







+
+
+
+







;;======================================================================

(define (make-lock-queue:db-dat)(make-vector 3))
(define-inline (lock-queue:db-dat-get-db        vec)    (vector-ref  vec 0))
(define-inline (lock-queue:db-dat-get-path      vec)    (vector-ref  vec 1))
(define-inline (lock-queue:db-dat-set-db!       vec val)(vector-set! vec 0 val))
(define-inline (lock-queue:db-dat-set-path!     vec val)(vector-set! vec 1 val))

(define (lock-queue:delete-lock-db dbdat)
  (let ((fname (lock-queue:db-dat-get-path dbdat)))
    (system (conc "rm -f " fname "*"))))

(define (lock-queue:open-db fname #!key (count 10))
  (let* ((actualfname (conc fname ".lockdb"))
	 (dbexists (file-exists? actualfname))
	 (db       (sqlite3:open-database actualfname))
	 (handler  (make-busy-timeout 136000)))
    (if dbexists
85
86
87
88
89
90
91
92

93
94


95
96
97
98
99
100
101
89
90
91
92
93
94
95

96
97

98
99
100
101
102
103
104
105
106







-
+

-
+
+







(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10))
  ;; no need to wait on journal on read only queries
  ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200)
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 "WARNING: exception on lock-queue:any-younger. Trying again in 30 seconds.")
	 (debug:print 0 "WARNING: exception on lock-queue:any-younger. Removing lockdb and trying again in 5 seconds.")
	 (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (thread-sleep! 5)
         (lock-queue:delete-lock-db dbdat)
	 (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1)))
       (begin
	 (debug:print 0 "ERROR:  Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (let ((res #f))
     (sqlite3:for-each-row
      (lambda (tid)
116
117
118
119
120
121
122
123
124




125
126
127
128
129
130
131
121
122
123
124
125
126
127


128
129
130
131
132
133
134
135
136
137
138







-
-
+
+
+
+







	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds")
	      (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      (if (> count 0)
		  (lock-queue:get-lock dbdat test-id count: (- count 1)))
	      #f)
		  (lock-queue:get-lock dbdat test-id count: (- count 1))
		  (begin ;; never recovered, remote the lock file and return #f, no lock obtained
		    (lock-queue:delete-lock-db dbdat)
		    #f)))
	    (sqlite3:with-transaction
	     db
	     (lambda ()
	       (sqlite3:for-each-row (lambda (tid lockstate)
				       (set! res (list tid lockstate)))
				     lckqry)
	       (if res