Megatest

Check-in [e799a787ef]
Login
Overview
Comment:One failure to get lock to open db go ahead and try, reset counters of api calls using count of threads, some threads might die and are unable to unregister themselves.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: e799a787ef3836c8bd39e7adcb8fde70c4eea1a0
User & Date: matt on 2023-05-22 20:41:51
Other Links: branch diff | manifest | tags
Context
2023-05-22
21:38
Couple untested fixes check-in: 4a2131ba1d user: matt tags: v1.80
20:41
One failure to get lock to open db go ahead and try, reset counters of api calls using count of threads, some threads might die and are unable to unregister themselves. check-in: e799a787ef user: matt tags: v1.80
17:50
Cherry pick ab049 check-in: 64f486e05b user: matt tags: v1.80
Changes

Modified api.scm from [cc67cf0a85] to [00015c9c1f].

215
216
217
218
219
220
221


















222
223
224
225
226
227
228
229
230
231
232

233
234

235
236
237
238
239
240
241
242
243
244






245
246
247
248
249
250
251
;;               (ok-res . #t)))
;; 	      (vector #f res))
;;             (begin
;;               #;(common:telemetry-log (conc "api-out:"(->string cmd))
;;               payload: `((params . ,params)
;;               (ok-res . #f)))
;;               (vector #t res))))))))



















;; indat is (cmd run-id params meta)
;;
;; WARNING: Do not print anything in the lambda of this function as it
;;          reads/writes to current in/out port
;;
(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params)
  (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.")
  (if (not *server-signature*)
      (set! *server-signature* (tt:mk-signature *toppath*)))
  (lambda (indat)

    (let* (;; (indat      (deserialize))
	   (newcount   (+ *api-process-request-count* 1))

	   (delay-wait (if (> newcount 10)
			   (- newcount 10)
			   0))
	   (normal-proc (lambda (cmd run-id params)
			  (case cmd
			    ((ping) *server-signature*)
			    (else
			     (api:dispatch-request dbstruct cmd run-id params))))))
      (set! *api-process-request-count* newcount)
      (set! *db-last-access* (current-seconds))






      (match indat
	((cmd run-id params meta)
	 (let* ((db-ok  (let* ((dbfname (dbmod:run-id->dbfname run-id))
			       (ok      (equal? dbfname (dbr:dbstruct-dbfname dbstruct))))
			  (case cmd
			    ((ping) #t) ;; we are fine
			    (else







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>


>










>
>
>
>
>
>







215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
;;               (ok-res . #t)))
;; 	      (vector #f res))
;;             (begin
;;               #;(common:telemetry-log (conc "api-out:"(->string cmd))
;;               payload: `((params . ,params)
;;               (ok-res . #f)))
;;               (vector #t res))))))))

(define *api-threads* '())
(define (api:register-thread th-in)
  (set! *api-threads* (cons (cons th-in (current-seconds)) *api-threads*)))

(define (api:unregister-thread th-in)
  (set! *api-threads* (filter (lambda (thdat)
				(not (eq? th-in (car thdat))))
			      *api-threads*)))

(define (api:remove-dead-or-terminated)
  (set! *api-threads* (filter (lambda (thdat)
				(not (member (thread-state (car thdat)) '(terminated dead))))
			      *api-threads*)))

(define (api:get-count-threads-alive)
  (length *api-threads*))
  

;; indat is (cmd run-id params meta)
;;
;; WARNING: Do not print anything in the lambda of this function as it
;;          reads/writes to current in/out port
;;
(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params)
  (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.")
  (if (not *server-signature*)
      (set! *server-signature* (tt:mk-signature *toppath*)))
  (lambda (indat)
    (api:register-thread (current-thread))
    (let* (;; (indat      (deserialize))
	   (newcount   (+ *api-process-request-count* 1))
	   (numthreads (api:get-count-threads-alive))
	   (delay-wait (if (> newcount 10)
			   (- newcount 10)
			   0))
	   (normal-proc (lambda (cmd run-id params)
			  (case cmd
			    ((ping) *server-signature*)
			    (else
			     (api:dispatch-request dbstruct cmd run-id params))))))
      (set! *api-process-request-count* newcount)
      (set! *db-last-access* (current-seconds))
      (if (not (eq? newcount numthreads))
	  (begin
	    (api:remove-dead-or-terminated)
	    (let ((threads-now (api:get-count-threads-alive)))
	      (debug:print 0 *default-log-port* "WARNING: newcount="newcount", numthreads="numthreads", remaining="threads-now)
	      (set! newcount threads-now))))
      (match indat
	((cmd run-id params meta)
	 (let* ((db-ok  (let* ((dbfname (dbmod:run-id->dbfname run-id))
			       (ok      (equal? dbfname (dbr:dbstruct-dbfname dbstruct))))
			  (case cmd
			    ((ping) #t) ;; we are fine
			    (else
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282

283
284
285
286
287
288
289
			   ((loaded) (conc "Server loaded, "newcount" threads in flight"))
			   (else     #f)))
		(result  (case status
			   ((busy)
			    (if (eq? cmd 'ping)
				(normal-proc cmd run-id params)
				;; newcount must be greater than 5 for busy
				(* 0.25 (- newcount 3)) ;; was 15
				)) ;; (- newcount 29)) ;; call back in as many seconds
			   ((loaded)
;; 			    (if (eq? (rmt:transport-mode) 'tcp)
;; 				(thread-sleep! 0.5))
			    (normal-proc cmd run-id params))
			   (else
			    (normal-proc cmd run-id params))))
		(meta   (case cmd
			  ((ping) `((sstate . ,server-state)))
			  (else   `((wait . ,delay-wait)))))
		(payload (list status errmsg result meta)))
	   (set! *api-process-request-count* (- *api-process-request-count* 1))
	   ;; (serialize payload)

	   payload))
	(else
	 (assert #f "FATAL: failed to deserialize indat "indat))))))
       

(define (api:dispatch-request dbstruct cmd run-id params)
  (if (not *no-sync-db*)







|













>







288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
			   ((loaded) (conc "Server loaded, "newcount" threads in flight"))
			   (else     #f)))
		(result  (case status
			   ((busy)
			    (if (eq? cmd 'ping)
				(normal-proc cmd run-id params)
				;; newcount must be greater than 5 for busy
				(* 1 (- newcount 3)) ;; was 15
				)) ;; (- newcount 29)) ;; call back in as many seconds
			   ((loaded)
;; 			    (if (eq? (rmt:transport-mode) 'tcp)
;; 				(thread-sleep! 0.5))
			    (normal-proc cmd run-id params))
			   (else
			    (normal-proc cmd run-id params))))
		(meta   (case cmd
			  ((ping) `((sstate . ,server-state)))
			  (else   `((wait . ,delay-wait)))))
		(payload (list status errmsg result meta)))
	   (set! *api-process-request-count* (- *api-process-request-count* 1))
	   ;; (serialize payload)
	   (api:unregister-thread (current-thread))
	   payload))
	(else
	 (assert #f "FATAL: failed to deserialize indat "indat))))))
       

(define (api:dispatch-request dbstruct cmd run-id params)
  (if (not *no-sync-db*)

Modified dbfile.scm from [10490588b1] to [b9d134230d].

1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421


1422
1423
1424
1425
1426
1427
1428

(define (dbfile:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))

(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300))
  (let ((start-time (current-seconds))
        (gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))
        (end-time (current-seconds))
        )
    (if gotlock
	(let ((res (proc)))
	  (dbfile:simple-file-release-lock fname)
	  res)
        (begin
          (dbfile:print-err "dbfile:with-simple-file-lock: " fname " is locked by "
			    (with-input-from-file fname
			      (lambda ()
				(dbfile:print-err (read-line)))))
          (dbfile:print-err "wait time = " (- end-time start-time))
	  (dbfile:print-err "ERROR: simple file lock could not get a lock for " fname " in " expire-time " seconds")
          #f
        )
    )
  )
)




(define *get-cache-stmth-mutex* (make-mutex))

(define (db:get-cache-stmth dbdat db stmt)
  (mutex-lock! *get-cache-stmth-mutex*)
  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))







|















|
|
<
<
<
|
>
>







1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417



1418
1419
1420
1421
1422
1423
1424
1425
1426
1427

(define (dbfile:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))

(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300)(run-anyway #f))
  (let ((start-time (current-seconds))
        (gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))
        (end-time (current-seconds))
        )
    (if gotlock
	(let ((res (proc)))
	  (dbfile:simple-file-release-lock fname)
	  res)
        (begin
          (dbfile:print-err "dbfile:with-simple-file-lock: " fname " is locked by "
			    (with-input-from-file fname
			      (lambda ()
				(dbfile:print-err (read-line)))))
          (dbfile:print-err "wait time = " (- end-time start-time))
	  (dbfile:print-err "ERROR: simple file lock could not get a lock for " fname " in " expire-time " seconds")
	  (if run-anyway
	      (let ((res (proc)))



		(dbfile:simple-file-release-lock fname)
		res)
	      #f)))))

(define *get-cache-stmth-mutex* (make-mutex))

(define (db:get-cache-stmth dbdat db stmt)
  (mutex-lock! *get-cache-stmth-mutex*)
  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))

Modified dbmod.scm from [88ea4fc563] to [a5cc78531f].

173
174
175
176
177
178
179
180

181
182
183
184
185
186
187
     (let* ((dbexists (file-exists? dbfullname))
	    (db       (sqlite3:open-database dbfullname))
	    (handler  (sqlite3:make-busy-timeout 136000)))
       (sqlite3:set-busy-handler! db handler)
       (if (and dbexists
		write-access)
	   (init-proc db))
       db))))


(define *sync-in-progress* #f)

;; Open the cachedb db and the on-disk db
;; populate the cachedb db with data
;;
;; Updates fields in dbstruct







|
>







173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
     (let* ((dbexists (file-exists? dbfullname))
	    (db       (sqlite3:open-database dbfullname))
	    (handler  (sqlite3:make-busy-timeout 136000)))
       (sqlite3:set-busy-handler! db handler)
       (if (and dbexists
		write-access)
	   (init-proc db))
       db))
   run-anyway: #t))

(define *sync-in-progress* #f)

;; Open the cachedb db and the on-disk db
;; populate the cachedb db with data
;;
;; Updates fields in dbstruct