Overview
Comment: | Extracting sheets to txtdb now works |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | dev |
Files: | files | file ages | folders |
SHA1: |
81649543cf84f528b5dcab1b27d6c492 |
User & Date: | matt on 2013-07-14 02:19:22 |
Other Links: | branch diff | manifest | tags |
Context
2013-07-14
| ||
02:23 | Extracting sheets to txtdb now works check-in: 9cacf1f935 user: matt tags: dev | |
02:19 | Extracting sheets to txtdb now works check-in: 81649543cf user: matt tags: dev | |
00:45 | Added snippets to txtdb that disentangle bits n pieces of xml from gnumeric file check-in: b8bf0ef6c4 user: matt tags: dev | |
Changes
Modified txtdb/txtdb.scm from [9e703b192e] to [8f826c3024].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | ;; Copyright 2006-2012, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. (use ssax) (use sxml-serializer) (use sxml-modifications) ;; Read a non-compressed gnumeric file (define (txtdb:read-gnumeric-xml fname) (with-input-from-file fname (lambda () (ssax:xml->sxml (current-input-port) '())))) | > > < > > > > > > > > > > > > > > > > > > > > > > > > > > > | > | | | < | > > > > > > > | > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > | > > > | > > > > > > > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | ;; Copyright 2006-2012, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. (use ssax) (use sxml-serializer) (use sxml-modifications) (use regex) (use srfi-69) ;; Read a non-compressed gnumeric file (define (txtdb:read-gnumeric-xml fname) (with-input-from-file fname (lambda () (ssax:xml->sxml (current-input-port) '())))) (define (find-section dat section #!key (depth 0)) (let loop ((hed (car dat)) (tal (cdr dat))) (if (list? hed) (let ((res (find-section hed section depth: (+ depth 1)))) (if res res (if (null? tal) #f (loop (car tal)(cdr tal))))) (if (eq? hed section) tal (if (null? tal) #f (loop (car tal)(cdr tal))))))) (define (remove-section dat section) (if (null? dat) '() (let loop ((hed (car dat)) (tal (cdr dat)) (res '())) (let ((newres (if (and (list? hed) (not (null? hed)) (equal? (car hed) section)) res (cons hed res)))) (if (null? tal) (reverse newres) (loop (car tal)(cdr tal) newres)))))) (define (list-sections dat) (filter (lambda (x)(and x)) (map (lambda (section) (if (and (list? section) (not (null? section))) (car section) #f)) dat))) (define (string->safe-filename str) (string-substitute (regexp " ") "_" str #t)) (define (sheet->txtdb dat targdir) (let ((sheet-name (string->safe-filename (car (find-section dat 'http://www.gnumeric.org/v10.dtd:Name)))) (cells (find-section dat 'http://www.gnumeric.org/v10.dtd:Cells)) (remaining (remove-section (remove-section dat 'http://www.gnumeric.org/v10.dtd:Name) 'http://www.gnumeric.org/v10.dtd:Cells)) (rownums (make-hash-table)) ;; num -> name (colnums (make-hash-table)) ;; num -> name (cols (make-hash-table))) ;; name -> ( (name val) ... ) (for-each (lambda (cell) (let ((rownum (string->number (car (find-section cell 'Row)))) (colnum (string->number (car (find-section cell 'Col)))) (valtype (let ((res (find-section cell 'ValueType))) (if res (car res) #f))) (value (let ((res (cdr (filter (lambda (x)(not (list? x))) cell)))) (if (null? res) "" (car res))))) ;; If colnum is 0 Then this is a row name, if rownum is 0 then this is a col name (cond ((eq? 0 colnum) ;; a blank in column zero is handled with the special name "row-N" (hash-table-set! rownums rownum (if (equal? value "") (conc "row-" rownum) value))) ((eq? 0 rownum) (hash-table-set! colnums colnum (if (equal? value "") (conc "col-" colnum) value))) (else (let ((colname (hash-table-ref/default colnums colnum (conc "col-" colnum))) (rowname (hash-table-ref/default rownums rownum (conc "row-" rownum)))) (hash-table-set! cols colname (cons (list rowname value) (hash-table-ref/default cols colname '())))))))) cells) (let ((ref-colnums (map (lambda (c) (list (cdr c)(car c))) (hash-table->alist colnums)))) (with-output-to-file (conc targdir "/" sheet-name ".dat") (lambda () (for-each (lambda (colname) (print "[" colname "]") (for-each (lambda (row) (print (car row) " " (cadr row))) (reverse (hash-table-ref cols colname))) (print)) (sort (hash-table-keys cols)(lambda (a b) (let ((colnum-a (assoc a ref-colnums)) (colnum-b (assoc b ref-colnums))) (if (and colnum-a colnum-b) (< (cadr colnum-a)(cadr colnum-b)) (if (and (string? a) (string? b)) (string< a b)))))))))))) (define (extract-txtdb dat targdir) (let* ((wrkbk (find-section dat 'http://www.gnumeric.org/v10.dtd:Workbook)) (wrk-rem (remove-section dat 'http://www.gnumeric.org/v10.dtd:Workbook)) (sheets (find-section wrkbk 'http://www.gnumeric.org/v10.dtd:Sheets)) (sht-rem (remove-section wrkbk 'http://www.gnumeric.org/v10.dtd:Sheets))) (create-directory targdir) (with-output-to-file (conc targdir "/workbook.sxml") (lambda () (pp wrk-rem))) (with-output-to-file (conc targdir "/sheets.sxml") (lambda () (pp sht-rem))) (for-each (lambda (sheet) (sheet->txtdb sheet targdir)) sheets))) #| (define x (txtdb:read-gnumeric-xml "testdata-stripped.xml")) |
︙ | ︙ |