Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -228,11 +228,12 @@ (begin (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc dbdat db params))) (if use-mutex (mutex-unlock! *db-with-db-mutex*)) ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat (stack-push! (dbr:subdb-dbstack subdb) dbdat)) + (if dbdat + (dbfile:add-dbdat dbstruct run-id dbdat)) res)) (exn (io-error) (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) (exn (corrupt) (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) @@ -433,13 +434,11 @@ ;; sync run from tmp disk to nfs disk if touched ;; (define (db:sync-touched dbstruct run-id #!key (force-sync #f)) (debug:print-info 0 *default-log-port* "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db")) - - (let* ( - (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id db:initialize-main-db))) + (let* ((subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id db:initialize-main-db))) (tmpdbfile (dbr:subdb-tmpdbfile subdb)) (mtdb (dbr:subdb-mtdbdat subdb)) (tmpdb (dbfile:open-sqlite3-db tmpdbfile #f)) (start-t (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) @@ -448,13 +447,12 @@ (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb mtdb)) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) - (stack-push! (dbr:subdb-dbstack subdb) tmpdb)) - #t -) + (dbfile:add-dbdat dbstruct run-id tmpdb) + #t)) ;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) @@ -1252,11 +1250,11 @@ (db:patch-schema-maindb (dbr:dbdat-dbh main-tmpdb)) (db:patch-schema-rundb (dbr:dbdat-dbh mtdb)) (db:patch-schema-rundb (dbr:dbdat-dbh main-tmpdb)) ) ) - (stack-push! (dbr:subdb-dbstack subdb) main-tmpdb)) + (dbfile:add-dbdat dbstruct #f main-tmpdb)) options))) (hash-table-values (dbr:dbstruct-subdbs dbstruct))) (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)) data-synced) ) @@ -1711,11 +1709,11 @@ "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND last_df > ?;") dneeded)) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (dbfile:add-dbdat dbstruct #f dbdat) blocks)) ;; returns id of the record, register a disk allocated to archiving and record it's last known ;; available space ;; @@ -1732,19 +1730,19 @@ (if res ;; record exists, update df and return id (begin (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) WHERE archive_area_name=? AND disk_path=?;" df bdisk-name bdisk-path) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (dbfile:add-dbdat dbstruct #f dbdat) res) (begin (sqlite3:execute db "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) VALUES (?,?,?);" bdisk-name bdisk-path df) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (dbfile:add-dbdat dbstruct #f dbdat) (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) ;; record an archive path created on a given archive disk (identified by it's bdisk-id) ;; if path starts with / then it is full, otherwise it is relative to the archive disk ;; preference is to store the relative path. @@ -1767,11 +1765,11 @@ (begin (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) VALUES (?,?,?);" bdisk-id archive-path (or du 0)) (set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (dbfile:add-dbdat dbstruct #f dbdat) res)) ;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id ;; @@ -5082,11 +5080,11 @@ (begin (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") (conc (current-directory) "/" outputfile))) results) ;; brutal clean up - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (dbfile:add-dbdat dbstruct #f dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") ;;====================================================================== Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -23,11 +23,15 @@ ;; (declare (uses commonmod)) (module dbfile * -(import scheme chicken data-structures extras) + (import scheme + chicken + data-structures + extras) + (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 stack files @@ -181,30 +185,20 @@ ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (dbfile:setup do-sync areapath tmppath) (cond - (*dbstruct-dbs* *dbstruct-dbs*);; TODO: when multiple areas are supported, this optimization will be a hazard - (else ;;(common:on-homehost?) + (*dbstruct-dbs* + (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") + *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard + (else (let* ((dbstruct (make-dbr:dbstruct))) - #;(when (not *toppath*) - (debug:print-info 0 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") - (launch:setup areapath: areapath)) (set! *dbstruct-dbs* dbstruct) (dbr:dbstruct-areapath-set! dbstruct areapath) (dbr:dbstruct-tmppath-set! dbstruct tmppath) dbstruct)))) -#;(define (dbfile:get-subdb dbstruct run-id) - (let* ((res (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->key run-id) #f))) - (if res - res - (let* ((newsubdb (make-dbr:subdb))) - (db:open-db newsubdb run-id areapath: (dbr:dbstruct-areapath dbstruct) do-sync: #t) - (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->key run-id) newsubdb) - newsubdb)))) - (define (dbfile:get-subdb dbstruct run-id) (let* ((dbfname (dbfile:run-id->dbname run-id))) (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f))) (define (dbfile:set-subdb dbstruct run-id subdb) @@ -264,20 +258,32 @@ (tmpdbpath (dbfile:run-id->path tmppath run-id))) (dbfile:open-sqlite3-db tmpdbpath init-proc))))))) ;; COMBINE dbfile:open-sqlite-db and dbfile:lock-create-open ;; + +;; this stuff is for initial debugging, please remove it when +;; this code stabilizes +(define *dbopens* (make-hash-table)) +(define (dbfile:inc-db-open dbfile) + (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1))) + (if (> curr-opens-count 1) ;; this should NOT be happening + (dbfile:print-err "ERROR: db "dbfile" has been opened "curr-opens-count" times!")) + (hash-table-set! *dbopens* dbfile curr-opens-count) + curr-opens-count)) ;; Open the classic megatest.db file (defaults to open in toppath) ;; ;; NOTE: returns a dbdat not a dbstruct! ;; (define (dbfile:open-sqlite3-db dbpath init-proc) (let* ((dbexists (file-exists? dbpath)) (write-access (file-write-access? dbpath)) (db (dbfile:cautious-open-database dbpath init-proc))) #;(sqlite3:open-database dbpath) + (dbfile:inc-db-open dbpath) (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 10000)) + (sqlite3:execute db (conc "PRAGMA synchronous = 0;")) ;; (init-proc db) (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) (define (dbfile:print-and-exit . params) (with-output-to-port @@ -440,25 +446,78 @@ ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; -(define (dbfile:cautious-open-database fname init-proc #!optional (tries-left 10)) - (let* ((lock-file (conc fname".lock")) - (retry (lambda () - (thread-sleep! 1.1) - (if (> tries-left 0) - (dbfile:cautious-open-database fname init-proc (- tries-left 1)))))) - (assert (>= tries-left 0) (conc "FATAL: Five attempts in dbfile:cautious-open-database of "fname", giving up.")) - (if (and (file-write-access? fname) (not (dbfile:simple-file-lock lock-file))) +(define (dbfile:cautious-open-database fname init-proc #!optional (tries-left 50)) + (let* ((busy-file (conc fname"-journal")) + (delay-time (* (- 51 tries-left) 1.1)) + (retry (lambda () + (thread-sleep! delay-time) + (if (> tries-left 0) + (dbfile:cautious-open-database fname init-proc (- tries-left 1)))))) + (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) + (if (and (file-write-access? fname) + (file-exists? busy-file)) + (begin + (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file " busy-file " exists, trying again in few seconds.") + (thread-sleep! 1) + (if (eq? tries-left 2) + (begin + (dbfile:print-err "INFO: forcing journal rollup "busy-file) + (dbfile:brute-force-salvage-db fname))) + (dbfile:cautious-open-database fname init-proc (- tries-left 1))) + (let* ((db-exists (file-exists? fname)) + (result (condition-case + (let* ((db (sqlite3:open-database fname))) + (if (and init-proc (not db-exists)) + (init-proc db)) + db) + (exn (io-error) + (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") + (retry)) + (exn (corrupt) + (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") + (retry)) + (exn (busy) + (dbfile:print-err exn "ERROR: database " fname + " is locked. Try copying to another location, remove original and copy back.") + (retry)) + (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") + (retry)) + (exn () + (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " + ((condition-property-accessor 'exn 'message) exn)) + (retry))))) + #;(if (file-write-access? fname) + (dbfile:simple-file-release-lock lock-file)) + result)))) + +(define (dbfile:brute-force-salvage-db fname) + (let* ((backupfname (conc fname"-"(current-process-id)".bak")) + (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;") + "cp "backupfname" "fname))) + (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n" + " "cmd) + (system cmd))) + +(define (dbfile:cautious-open-database-orig fname init-proc #!optional (tries-left 50)) + (let* ((lock-file (conc fname".lock")) + (delay-time (* (- 51 tries-left) 1.1)) + (retry (lambda () + (thread-sleep! delay-time) + (if (> tries-left 0) + (dbfile:cautious-open-database fname init-proc (- tries-left 1)))))) + (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) + (if (and (file-write-access? fname) (not (dbfile:simple-file-lock lock-file expire-time: 3))) (begin - (dbfile:print-err "INFO: dbfile:cautious-open-database: lock file " lock-file " exists, trying again in 1 second.") + (dbfile:print-err "INFO: dbfile:cautious-open-database: lock file " lock-file " exists, trying again in few seconds.") (thread-sleep! 1) (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: stealing the lock "lock-file) - (delete-file lock-file))) + (delete-file* lock-file))) (dbfile:cautious-open-database fname init-proc (- tries-left 1))) (let* ((db-exists (file-exists? fname)) (result (condition-case (let* ((db (sqlite3:open-database fname))) (if (and init-proc (not db-exists))