Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -242,11 +242,11 @@ ;; return a previously opened db handle to the stack of available handles (define (dbfile:add-dbdat dbstruct run-id dbdat) (let* ((subdb (dbfile:get-subdb dbstruct run-id)) (age (- (current-seconds)(dbr:dbdat-birth-sec dbdat)))) - (if (> age 30) ;; just testing - discard and close after 30 sec + (if (> age 300) ;; just testing - discard and close after 30 sec (begin ;; (map sqlite3:finalize! (hash-table-values (dbr:dbdat-stmt-cache dbdat))) ;; (sqlite3:finalize! (dbr:dbdat-dbh dbdat)) (dbfile:print-err "INFO: Discarded dbdat over 30 sec old ("age"s)")) (begin @@ -501,46 +501,37 @@ (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: forcing journal rollup "busy-file) (dbfile:brute-force-salvage-db fname))) (dbfile:cautious-open-database fname init-proc (- tries-left 1))) - (let* ((db-exists (file-exists? fname)) - (result (condition-case - (let* ((is-no-sync (substring-index "no-sync.db" fname)) - (nosyncdb *no-sync-db*) - (lockname (conc fname ".lock")) - (db (begin - (dbfile:simple-file-lock-and-wait lockname expire-time: 5) - (if (and (not is-no-sync) - nosyncdb) - (db:no-sync-get-lock nosyncdb fname)) - (sqlite3:open-database fname)))) - (if (and init-proc (not db-exists)) - (init-proc db)) - (if (and (not is-no-sync) - nosyncdb) - (db:no-sync-del! nosyncdb fname)) - (dbfile:simple-file-release-lock lockname) - db) - (exn (io-error) - (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") - (retry)) - (exn (corrupt) - (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") - (retry)) - (exn (busy) - (dbfile:print-err exn "ERROR: database " fname - " is locked. Try copying to another location, remove original and copy back.") - (retry)) - (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") - (retry)) - (exn () - (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " - ((condition-property-accessor 'exn 'message) exn)) - (retry))))) + (let* ((result (condition-case + (dbfile:with-simple-file-lock + (conc fname ".lock") + (lambda () + (let* ((db-exists (file-exists? fname)) + (db (sqlite3:open-database fname))) + (if (and init-proc (not db-exists)) + (init-proc db)) + db))) + (exn (io-error) + (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") + (retry)) + (exn (corrupt) + (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") + (retry)) + (exn (busy) + (dbfile:print-err exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.") + (retry)) + (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") + (retry)) + (exn () + (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)) + (retry))))) #;(if (file-write-access? fname) - (dbfile:simple-file-release-lock lock-file)) + (dbfile:simple-file-release-lock lock-file)) result)))) (define (dbfile:brute-force-salvage-db fname) (let* ((backupfname (conc fname"-"(current-process-id)".bak")) (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;") @@ -646,22 +637,21 @@ (define (db:no-sync-get-lock db keyname) (sqlite3:with-transaction db (lambda () (condition-case - `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)) - + (let* ((curr-val (db:no-sync-get/default db keyname #f))) + (if curr-val + `(#f . ,curr-val) ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)) + (let ((lock-time (current-seconds))) + (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)))) (exn (io-error) (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again.")) (exn (corrupt) (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed.")) (exn (busy) (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back.")) (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem.")) (exn () ;; (status done) ;; I don't know how to detect status done but no data! - (let ((lock-time (current-seconds))) - ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) - (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) - `(#t . ,lock-time))) - #;(exn () (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n" ((condition-property-accessor 'exn 'message) exn)) `(#f . ,(current-seconds))))))) (define (db:no-sync-get-lock-timeout db keyname timeout) @@ -674,10 +664,21 @@ (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)) lockdat)) (else lockdat)))) +;; NOTE: This will steal the lock after timeout of waiting. +;; +(define (db:with-no-sync-lock db keyname timeout proc) + (let* ((lockdat (db:no-sync-get-lock-timeout db keyname)) + (gotlock (car lockdat)) + (locktime (cdr lockdat))) + (if gotlock + (let ((res (proc))) + (db:no-sync-del! db keyname) + res)))) + ;;====================================================================== ;; sync back functions pulled from db.scm ;;====================================================================== ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f @@ -1316,7 +1317,14 @@ (handle-exceptions exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) - +(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300)) + (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))) + (if gotlock + (let ((res (proc))) + (dbfile:simple-file-release-lock fname) + res) + (assert #t "FATAL: simple file lock never got a lock.")))) + )