Overview
Comment: | Corrected exit delays, added attempted repair on discovering a corrupted db |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
d125f9632a36ba7019e7d90fa22863df |
User & Date: | matt on 2015-05-28 00:18:04 |
Other Links: | branch diff | manifest | tags |
Context
2015-05-28
| ||
22:55 | Added basis for test of legacy vs. current db check-in: 0457c113cf user: matt tags: v1.60 | |
06:57 | Added basic template for getting changed runs. Fixed exit handling, exit promptly after cleanup check-in: 33295b94b3 user: matt tags: v1.60 | |
00:18 | Corrected exit delays, added attempted repair on discovering a corrupted db check-in: d125f9632a user: matt tags: v1.60 | |
2015-05-27
| ||
11:40 | Removed exit from on-exit call. check-in: b4a4de8e9e user: mrwellan tags: v1.60, v1.6014a | |
Changes
Modified common.scm from [9be5018f3c] to [005a48a275].
︙ | ︙ | |||
279 280 281 282 283 284 285 | (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) (vector-set! *task-db* 0 #f)))))) "Cleanup db exit thread")) (th2 (make-thread (lambda () (debug:print 4 "Attempting clean exit. Please be patient and wait a few seconds...") (if no-hurry | | | | | 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) (vector-set! *task-db* 0 #f)))))) "Cleanup db exit thread")) (th2 (make-thread (lambda () (debug:print 4 "Attempting clean exit. Please be patient and wait a few seconds...") (if no-hurry (thread-sleep! 0.1) ;; give the clean up few seconds to do it's stuff (thread-sleep! 4)) (debug:print 4 " ... done") ) "clean exit"))) (thread-start! th2) (thread-start! th1) (thread-join! th2)))) (define (std-signal-handler signum) |
︙ | ︙ |
Modified db.scm from [5f7db1b5aa] to [9ce696701e].
︙ | ︙ | |||
160 161 162 163 164 165 166 | (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";")))) ;; open an sql database inside a file lock ;; ;; returns: db existed-prior-to-opening ;; (define (db:lock-create-open fname initproc) | | | | | | | | < | > > > > | | | | | | | | | | | 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";")))) ;; open an sql database inside a file lock ;; ;; returns: db existed-prior-to-opening ;; (define (db:lock-create-open fname initproc) ;; (if (file-exists? fname) ;; (let ((db (sqlite3:open-database fname))) ;; (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) ;; (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; db) (let* ((parent-dir (pathname-directory fname)) (dir-writable (file-write-access? parent-dir)) (file-exists (file-exists? fname)) (file-write (if file-exists (file-write-access? fname) dir-writable ))) (if file-write ;; dir-writable (let ((lock (obtain-dot-lock fname 1 5 10)) (db (sqlite3:open-database fname))) (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") (if (not file-exists)(initproc db)) (release-dot-lock fname) db) (begin (debug:print 0 "ERROR: no such db in non-writable dir " fname) (sqlite3:open-database fname))))) ;; ) ;; This routine creates the db. It is only called if the db is not already opened ;; (define (db:open-rundb dbstruct run-id #!key (attemptnum 0)(do-not-open #f)) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) (let* ((local (dbr:dbstruct-get-local dbstruct)) (rdb (if local (dbr:dbstruct-get-localdb dbstruct run-id) |
︙ | ︙ | |||
484 485 486 487 488 489 490 | '("description" #f) '("reviewed" #f) '("iterated" #f) '("avg_runtime" #f) '("avg_disk" #f) '("tags" #f) '("jobgroup" #f))))) | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > | | | | | 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 | '("description" #f) '("reviewed" #f) '("iterated" #f) '("avg_runtime" #f) '("avg_disk" #f) '("tags" #f) '("jobgroup" #f))))) ;; use bunch of Unix commands to try to break the lock and recreate the db ;; (define (db:move-and-recreate-db dbdat) (let* ((dbpath (db:dbdat-get-path dbdat)) (dbdir (pathname-directory dbpath)) (fname (pathname-strip-directory dbpath)) (fnamejnl (conc fname "-journal")) (tmpname (conc fname "." (current-process-id))) (tmpjnl (conc fnamejnl "." (current-process-id)))) (debug:print 0 "ERROR: " fname " appears corrupted. Making backup \"old/" fname "\"") (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) (system (conc "rm -f " dbpath)) (if (file-exists? fnamejnl) (begin (debug:print 0 "ERROR: " fnamejnl " found, moving it to old dir as " tmpjnl) (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) (system (conc "rm -f " dbdir "/" fnamejnl)))) ;; attempt to recreate database (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname)))) ;; return #f to indicate the dbdat should be closed/reopened ;; else return dbdat ;; (define (db:repair-db dbdat #!key (numtries 1)) (let* ((dbpath (db:dbdat-get-path dbdat)) (dbdir (pathname-directory dbpath)) (fname (pathname-strip-directory dbpath))) (debug:print-info 0 "Checking db " dbpath " for errors.") (cond ((not (file-write-access? dbdir)) (debug:print 0 "WARNING: can't write to " dbdir ", can't fix " fname) #f) ;; handle special cases, megatest.db and monitor.db ;; ;; NOPE: apply this same approach to all db files ;; (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed (handle-exceptions exn (begin (db:move-and-recreate-db dbdat) (if (> numtries 0) (db:repair-db dbdat numtries: (- numtries 1)) #f) (debug:print 0 "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") (debug:print 0 " check the following:\n" " 1. full directories, look in ~/ /tmp and " dbdir "\n" " 2. write access to " dbdir "\n\n" " if the automatic recovery failed you may be able to recover data by doing \"" (if (member fname '("megatest.db" "monitor.db")) "megatest -cleanup-db" "megatest -import-megatest.db;megatest -cleanup-db") "\"\n") (exit) ;; we can not safely continue when a db was corrupted - even if fixed. ) ;; test read/write access to the database (let ((db (sqlite3:open-database dbpath))) (cond ((equal? fname "megatest.db") (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';")) ((equal? fname "main.db") (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';")) ((string-match "\\d.db" fname) (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';")) ((equal? fname "monitor.db") (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) (else (sqlite3:execute db "vacuum;"))) (finalize! db) #t)))))) ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; db's are dbdat's ;; (define (db:sync-tables tbls fromdb todb . slave-dbs) (mutex-lock! *db-sync-mutex*) (handle-exceptions exn (begin (mutex-unlock! *db-sync-mutex*) (debug:print 0 "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") (print-call-chain (current-error-port)) (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (debug:print 0 " src db: " (db:dbdat-get-path fromdb)) (for-each (lambda (dbdat) (let ((dbpath (db:dbdat-get-path dbdat))) (debug:print 0 " dbpath: " dbpath) (if (not (db:repair-db dbdat)) (begin (debug:print 0 "ERROR: Failed to rebuild " dbpath ", exiting now.") (exit))))) (cons todb slave-dbs)) 0) ;; (if *server-run* ;; we are inside a server, throw a sync-failed error ;; (signal (make-composite-condition ;; (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context."))) ;; 0)) ;; return zero for num synced ;; (set! *time-to-exit* #t) ;; let watch dog know that it is time to die. ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") ;; (portlogger:open-run-close portlogger:set-port port "released") ;; (exit 1))) (cond ((not fromdb) (debug:print 3 "WARNING: db:sync-tables called with fromdb missing") -1) |
︙ | ︙ |