Megatest

artifacts.scm at [5ba2bbf0ed]
Login

File artifacts/artifacts.scm artifact b5b4746c14 part of check-in 5ba2bbf0ed


;; Copyright 2006-2017, Matthew Welland.
;; 
;; This file is part of artifacts
;; 
;;     Pkts is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Pkts is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Pkts.  If not, see <http://www.gnu.org/licenses/>.
;;

;; CARDS:
;;
;; A card is a line of text, the first two characters are a letter followed by a
;;   space. The letter is the card type.
;;
;; artifact:
;;
;; An artifact is a sorted list of cards with a final card Z that contains the shar1 hash
;;   of all of the preceding cards.
;;
;; AARTIFACT:
;;
;;  An alist mapping card types to card data
;;      '((T . "artifacttype")
;;        (a . "some content"))
;;
;; EARTIFACT:
;;
;;  Extended packet using friendly keys. Must use a artifactspec to convert to/from eartifacts
;;    '((ptype . "artifacttype")
;;      (adata . "some content))
;;
;; DARTIFACT:
;;
;; artifacts pulled from the database have this format:
;;
;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b")     <= this is a the alist
;;       (t . "v1.63/tip/dev")
;;       (c . "QUICKPATT")
;;       (T . "runstart")
;;       (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd")
;;       (D . "1488995096.0"))
;;  (id . 8)
;;  (group-id . 0)
;;  (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b")
;;  (parent . "")
;;  (artifact-type . "runstart")
;;  (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b"))
;;
;; artifactspec is alist of alists mapping types and nicekeys to keys
;;
;; '((posting . ((title . t)
;;               (url   . u)
;;               (blurb . b)))
;;   (comment . ((comment . c)
;;               (score   . s))))

;; Reserved cards:
;;   P      : artifact parent
;;   R      : reference artifact containing mapping of short string -> sha1sum strings
;;   T      : artifact type
;;   D      : current time from (current-time), unless provided
;;   Z      : shar1 hash of the packet

;; Example usage:
;;
;; Create a artifact:
;;
;; (use artifacts)
;; (define-values (uuid artifact)
;;     (alist->artifact
;;       '((fruit . "apple") (meat . "beef"))  ;; this is the data to convert
;;       '((foods (fruit . f) (meat . m)))     ;; this is the artifact spec
;;       ptype:
;;       'foods))
;;
;; Add to artifact queue:
;;
;; (define db (open-queue-db "/tmp/artifacts" "artifacts.db"))
;; (add-to-queue db artifact uuid 'foods #f 0) ;; no parent and use group_id of 0
;;
;; Retrieve the packet from the db and extract a value:
;;
;; (alist-ref
;;    'meat
;;    (dartifact->alist
;;         (car (get-dartifacts db #f 0 #f))
;;        '((foods (fruit . f)
;;                 (meat . m)))))
;; => "beef"
;;

(module artifacts
(
;; cards, util and misc
;; sort-cards
;; calc-sha1
;;
;; low-level constructor procs, exposed only for development/testing, will be removed
construct-sdat
construct-artifact     
card->type/value  
add-z-card

;; queue database procs
open-queue-db
add-to-queue
create-and-queue
;; lookup-by-uuid
lookup-by-id
get-dartifacts
get-not-processed-artifacts
get-related
find-artifacts
process-artifacts
get-descendents
get-ancestors
get-artifacts
;; get-last-descendent
;; with-queue-db
;; load-artifacts-to-db

;; procs that operate directly on artifacts, sdat, aartifacts, dartifacts etc.
artifact->alist    ;; artifact -> aartifact (i.e. alist)
artifact->sdat     ;; artifact -> '("a aval" "b bval" ...)
sdat->alist   ;; '("a aval" "b bval"...) -> ((a . "aval")(b . "bval") ...)
dblst->dartifacts  ;; convert list of tuples from queue db into dartifacts
dartifact->alist   ;; flatten a dartifact into an alist containing all db fields and the artifact alist
dartifacts->alists ;; apply dartifact->alist to a list of alists using a artifact-spec
alist->artifact    ;; returns two values uuid, artifact
get-value     ;; looks up a value given a key in a dartifact
flatten-all   ;; merge the list of values from a query which includes a artifact into a flat alist <== really useful!
check-artifact

;; artifact alists
write-alist->artifact
read-artifact->alist

;; archive database
;; archive-open-db
;; write-archive-artifacts
;; archive-artifacts
;; mark-processed

;; artifactsdb
artifactdb-conn     ;; useful
artifactdb-fname
artifactsdb-open
artifactsdb-close
artifactsdb-add-record
;; temporary
artifactdb-artifactspec

;; utility procs
increment-string ;; used to get indexes for strings in ref artifacts
make-report      ;; make a .dot file
calc-sha1
uuid-first-two-letters
uuid-remaining-letters

;; file and directory utils
multi-glob
capture-dir
file-get-sha1
check-same
link-or-copy
same-partition?
link-if-same-partition
archive-copy
write-to-archive
artifact-rollup
read-artifacts-into-hash
hash-of-artifacts->bundle
archive-dest

;; pathname-full-filename

;; minimal artifact functions
minimal-artifact-read
minimal-artifact->alist
afact-get-D
afact-get-Z
afact-get-T
afact-get
afact-get-number/default


;; bundles
write-bundle
read-bundle

;; new artifacts db
with-todays-adb
get-all-artifacts
refresh-artifacts-db

)

(import (chicken base) scheme (chicken process) (chicken time posix)
	(chicken io) (chicken file) (chicken pathname)
        chicken.process-context.posix (chicken string)
	(chicken time) (chicken sort) (chicken file posix) (chicken condition) srfi-1
	regex srfi-13 srfi-69 (chicken port) (chicken process-context)
	crypt sha1 matchable message-digest sqlite3 typed-records
	directory-utils
	scsh-process)

;;======================================================================
;; DATA MANIPULATION UTILS
;;======================================================================

(define-inline (unescape-data data)
  (string-translate* data '(("\\n" . "\n") ("\\\\" . "\\"))))

(define-inline (escape-data data)
  (string-translate* data '(("\n" . "\\n") ("\\" . "\\\\"))))

(define-inline (make-card type data)
  (conc type " " (escape-data (->string data))))

;; reverse an alist for doing artifactkey -> external key conversions
;;
(define-inline (reverse-aspec aspec)
  (map (lambda (dat)
	 (cons (cdr dat)(car dat)))
       aspec))

;; add a card to the list of cards, sdat
;; if type is #f return only sdat
;; if data is #f return only sdat
;;
(define-inline (add-card sdat type data)
  (if (and type data)
      (cons (make-card type data) sdat)
      sdat))

;;======================================================================
;; STRING AS FUNKY NUMBER
;;======================================================================

;; NOTE: PTDZ are removed as they are reserved. NB// the R card is not used in a
;;       ref, instead the P parent card is used.
;;       Question: Why does it matter to remove PTDZ?
;;                 To make the ref easier to use the ref strings will be the keys
;;                 so we cannot have overlap with any actual keys. But this is a
;;                 bit silly. What we need to do instead is reject keys of length
;;                 one where the char is in PTDZ
;;
;; This is basically base92
;;
(define string-num-chars (string->list "!#$%&'()*+,-./0123456789:;<=>?@ABCEFGHIJKLMNOQRSUVWXY[\\]^_abcdefghijklmnopqrstuvwxyz{|}~"))
;; "0123456789abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOQSUVWXY!#$%&'()*+,-./[]:;<=>?\\^_{}|"))

(define (char-incr inchar)
  (let* ((carry     #f)
	 (next-char (let ((rem (member inchar string-num-chars)))
		      (if (eq? (length rem) 1) ;; we are at the last character in our string-num-chars list
			  (begin
			    (set! carry #t)
			    (car string-num-chars))
			  (cadr rem)))))
    (values next-char carry)))
    
(define (increment-string str)
  (if (string-null? str)
      "0"
      (let ((strlst (reverse (string->list str)))) ;; need to process the string from the lsd
	(list->string
	 (let loop ((hed (car strlst))
		    (tal (cdr strlst))
		    (res '()))
	   (let-values (((newhed carry)(char-incr hed)))
	     ;; (print "newhed: " newhed " carry: " carry " tal: " tal)
	     (let ((newres (cons newhed res)))
	       (if carry ;; we'll have to propagate the carry
		   (if (null? tal) ;; at the end, tack on "0" (which is really a "1")
		       (cons (car string-num-chars) newres)
		       (loop (car tal)(cdr tal) newres))
		   (append (reverse tal) newres)))))))))
    
;;======================================================================
;; P K T S D B   I N T E R F A C E
;;
;; INTEGER, REAL, TEXT
;;======================================================================
;;
;; spec
;;  ( (tablename1 . (field1name L1 TYPE)
;;                  (field2name L2 TYPE) ... )
;;    (tablename2 ... ))
;;
;;  Example: (tests (testname n TEXT)
;;                  (rundir   r TEXT)
;;                   ... )
;;
;; artifact keys are taken from the first letter, if that is not unique
;; then look at the next letter and so on
;;

;; simplify frequent need to get one result with default
;;
(define (get-one db default qry . params)
  (apply fold-row
   car
   default
   db
   qry
   params))

(define (get-rows db qry . params)
  (apply fold-row
   cons
   db
   qry
   params))
  
;; use this struct to hold the artifactspec and the db handle
;;
(defstruct artifactdb
  (fname       #f)
  (artifactsdb-spec #f)
  (artifactspec     #f)  ;; cache the artifactspec
  (field-keys  #f)  ;; cache the field->key mapping (field1 . k1) ...
  (key-fields  #f)  ;; cache the key->field mapping
  (conn        #f)
  )

;; WARNING: There is a simplification in the artifactsdb spec w.r.t. artifactspec.
;;          The field specs are the cdr of the table list - not a full
;;          list. The extra list level in artifactspec is gratuitous and should
;;          be removed.
;;
(define (artifactsdb-spec->artifactspec tables-spec)
  (map (lambda (tablespec)
	 (list (car tablespec)
	       (map (lambda (field-spec)
		      (cons (car field-spec)(cadr field-spec)))
		    (cdr tablespec))))
       tables-spec))

(define (artifactsdb-open dbfname artifactsdb-spec)
  (let* ((pdb      (make-artifactdb))
	 (dbexists (file-exists? dbfname))
	 (db       (open-database dbfname)))
    (artifactdb-artifactsdb-spec-set! pdb artifactsdb-spec)
    (artifactdb-artifactspec-set!     pdb (artifactsdb-spec->artifactspec artifactsdb-spec))
    (artifactdb-fname-set!       pdb dbfname)
    (artifactdb-conn-set!        pdb db)
    (if (not dbexists)
	(artifactsdb-init pdb))
    pdb))

(define (artifactsdb-init artifactsdb)
  (let* ((db          (artifactdb-conn artifactsdb))
	 (artifactsdb-spec (artifactdb-artifactsdb-spec artifactsdb)))
    ;; create a table for the artifacts themselves
    (execute db "CREATE TABLE IF NOT EXISTS artifacts (id INTEGER PRIMARY KEY, zkey TEXT, record_id INTEGER, artifact TEXT);")
    (for-each
     (lambda (table)
       (let* ((table-name (car table))
	      (fields     (cdr table))
	      (stmt (conc "CREATE TABLE IF NOT EXISTS "
			  table-name
			  " (id INTEGER PRIMARY KEY,"
			  (string-intersperse
			   (map (lambda (fieldspec)
				  (conc (car fieldspec) " "
					(caddr fieldspec)))
				fields)
			   ",")
			  ");")))
	 (execute db stmt)))
     artifactsdb-spec)))

;; create artifact from the data and insert into artifacts table
;; 
;; data is assoc list of (field . value) ...
;; tablename is a symbol matching the table name
;;
(define (artifactsdb-add-record artifactsdb tablename data #!optional (parent #f))
  (let*-values (((zkey artifact) (alist->artifact data (artifactdb-artifactspec artifactsdb) ptype: tablename)))
    ;; have the data as alist so insert it into appropriate table also
    (let* ((db        (artifactdb-conn artifactsdb)))
      ;; TODO: Address collisions
      (execute db "INSERT INTO artifacts (zkey,artifact,record_id) VALUES (?,?,?);"
		zkey artifact -1)
      (let* (;; (artifactid     (artifactsdb-artifactkey->artifactid artifactsdb artifactkey))
	     (record-id (artifactsdb-insert artifactsdb tablename data)))
	(execute db "UPDATE artifacts SET record_id=? WHERE zkey=?;"
		  record-id zkey)
      ))))

;; 
(define (artifactsdb-insert artifactsdb tablename data)
  (let* ((db (artifactdb-conn artifactsdb))
	 (stmt (conc "INSERT INTO " tablename
		     " (" (string-intersperse (map conc (map car data)) ",")
		     ") VALUES ('"
		     ;; TODO: Add lookup of data type and do not
		     ;;       wrap integers with quotes
		     (string-intersperse (map conc (map cdr data)) "','")
		     "');")))
    (print "stmt: " stmt)
    (execute db stmt)
    ;; lookup the record-id and return it
    
    ))
    
(define (artifactsdb-close artifactsdb)
  (finalize! (artifactdb-conn artifactsdb)))

;; (let loop ((s "0")(n 0))(print s)(if (< n 5000)(loop (increment-string s)(+ n 1))))

;;======================================================================
;; CARDS, MISC and UTIL
;;======================================================================

;; given string (likely multi-line) "dat" return shar1 hash
;;
(define (calc-sha1 instr)
  (message-digest-string
   (sha1-primitive)
   instr))

;; given a single card return its type and value
;;
(define (card->type/value card)
  (let ((ctype (substring card 0 1))
	(cval  (substring card 2 (string-length card))))
    (values (string->symbol ctype) cval)))

;;======================================================================
;; SDAT procs
;;  sdat is legacy/internal usage. Intention is to remove sdat calls from
;;  the exposed calls.
;;======================================================================

;; sort list of cards
;;
(define-inline (sort-cards sdat)
  (sort sdat string<=?))

;; artifact rules
;; 1. one card per line
;; 2. at least one card
;; 3. no blank lines

;; given sdat, a list of cards return uuid, packet (as sdat)
;;
(define (add-z-card sdat)
  (let* ((sorted-sdat (sort-cards sdat))
	 (dat         (string-intersperse sorted-sdat "\n"))
	 (uuid        (calc-sha1 dat)))
    (values
     uuid
     (conc
      dat
      "\nZ "
      uuid))))

(define (check-artifact artifact)
  (handle-exceptions
      exn
      #f ;; anything goes wrong - call it a crappy artifact
    (let* ((sdat (string-split artifact "\n"))
	   (rdat (reverse sdat)) ;; reversed
	   (zdat (car rdat))
	   (Z    (cadr (string-split zdat)))
	   (cdat (string-intersperse (reverse (cdr rdat)) "\n")))
      (equal? Z (calc-sha1 cdat)))))

;;======================================================================
;; AARTIFACTs
;;======================================================================

;; convert a sdat (list of cards) to an alist
;;
(define (sdat->alist sdat)
  (let loop ((hed (car sdat))
	     (tal (cdr sdat))
	     (res '()))
    (let-values (( (ctype cval)(card->type/value hed) ))
      ;; if this card is not one of the common ones tack it on to rem
      (let* ((oldval (alist-ref ctype res))
	     (newres (cons (cons ctype
				 (if oldval ;; list or string
				     (if (list? oldval)
					 (cons cval oldval)
					 (cons cval (list oldval)))
				     cval))
			   res)))
	(if (null? tal)
	    newres
	    (loop (car tal)(cdr tal) newres))))))

;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b")     <= this is a the alist
;;       (t . "v1.63/tip/dev")
;;       (c . "QUICKPATT")
;;       (T . "runstart")
;;       (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd")
;;       (D . "1488995096.0"))
;;  (id . 8)
;;  (group-id . 0)
;;  (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b")
;;  (parent . "")
;;  (artifact-type . "runstart")
;;  (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b"))
;;
;; artifactspec is alist of alists mapping types and nicekeys to keys
;;
;; '((posting . ((title . t)
;;               (url   . u)
;;               (blurb . b)))
;;   (comment . ((comment . c)
;;               (score   . s))))

;; DON'T USE? 
;;
(define (get-value field dartifact . spec-in)
  (if (null? spec-in)
      (alist-ref field dartifact)
      (let* ((spec  (car spec-in))
	     (aartifact  (alist-ref 'aartifact dartifact))) ;; get the artifact alist
	(if (and aartifact spec)
	    (let* ((ptype (alist-ref 'artifact-type dartifact))
		   (pspec (alist-ref (string->symbol ptype) spec))) ;; do we have a spec for this type of artifact
	      (and pspec
		  (let* ((key (alist-ref field pspec)))
		    (and key (alist-ref key aartifact)))))
	    #f))))

;; convert a dartifact to a pure alist given a artifactspec
;; this flattens out the alist to include the data from
;; the queue database record
;;
(define (dartifact->alist dartifact artifactspec)
  (let* ((aartifact       (alist-ref 'aartifact dartifact))
	 (artifact-type   (or (alist-ref 'artifact-type dartifact) ;; artifact-type is from the database field artifact_type
			 (alist-ref 'T aartifact)))
	 (artifact-fields (alist-ref (string->symbol artifact-type) artifactspec))
	 (rev-fields (if artifact-fields
			 (reverse-aspec artifact-fields)
			 '())))
    (append (map (lambda (entry)
		   (let* ((artifact-key (car entry))
			  (new-key (or (alist-ref artifact-key rev-fields) artifact-key)))
		     `(,new-key . ,(cdr entry))))
		 aartifact)
	    dartifact)))

;; convert a list of dartifacts into a list of alists using artifact-spec
;;
(define (dartifacts->alists dartifacts artifact-spec)
   (map (lambda (x)
	  (dartifact->alist x artifact-spec))
	dartifacts))

;; Generic flattener, make the tuple and artifact into a single flat alist
;;
;; qry-result-spec is a list of symbols corresponding to each field
;;
(define (flatten-all inlst artifactspec . qry-result-spec)
  (map
   (lambda (tuple)
     (dartifact->alist
      (apply dblst->dartifacts tuple qry-result-spec)
      artifactspec))
   inlst))

;; call like this:
;;  (construct-sdat 'a "a data" 'S "S data" ...)
;; returns list of cards
;;  ( "A a value" "D 12345678900" ...)
;;
(define (construct-sdat . alldat)
  (let ((have-D-card #f)) ;; flag
    (if (even? (length alldat))
	(let loop ((type (car alldat))
		   (data (cadr alldat))
		   (tail (cddr alldat))
		   (res  '()))
	  (if (eq? type 'D)(set! have-D-card #t))
	  (if (null? tail)
	      (if have-D-card ;; return the constructed artifact, add a D card if none found
		  (add-card res type data)
		  (add-card 
		   (add-card res 'D (current-seconds))
		   type data))
	      (loop (car tail)
		    (cadr tail)
		    (cddr tail)
		    (add-card res type data))))
	#f))) ;; #f means it failed to create the sdat

(define (construct-artifact . alldat)
  (add-z-card
   (apply construct-sdat alldat)))

;;======================================================================
;; CONVERTERS
;;======================================================================

(define (artifact->sdat artifact)
  (map unescape-data (string-split artifact "\n")))

;; given a pure artifact return an alist
;;
(define (artifact->alist artifact #!key (artifactspec #f))
  (let ((sdat (cond
	       ((string? artifact)  (artifact->sdat artifact))
	       ((list? artifact)    artifact)
	       (else #f))))
    (if artifact
	(if artifactspec
	    (dartifact->alist (list (cons 'aartifact (sdat->alist sdat))) artifactspec)
	    (sdat->alist sdat))
	#f)))

;; convert an alist to an sdat
;;  in: '((a . "blah")(b . "foo"))
;; out: '("a blah" "b foo")
;;
(define (alist->sdat adat)
  (map (lambda (dat)
	 (conc (car dat) " " (cdr dat)))
       adat))

;; adat is the incoming alist, aspec is the mapping
;; from incoming key to the artifact key (usually one
;; letter to keep data tight) see the artifactspec at the
;; top of this file
;;
;; NOTE: alists can contain multiple instances of the same key (supported fine by artifacts)
;;       but you (obviously I suppose) cannot use alist-ref to access those entries.
;;
(define (alist->artifact adat aspec #!key (ptype #f)(no-d #f))
  (let* ((artifact-type (or ptype
			    (alist-ref 'T adat) ;; can provide in the incoming alist
			    #f))
	 (artifact-spec (if artifact-type            ;; alist of external-key -> key
			    (or (alist-ref artifact-type aspec) '())
			    (if (null? aspec)
				'()
				(cdar aspec)))) ;; default to first one if nothing specified
	 (new-alist (map (lambda (dat)
			   (let* ((key    (car dat))
				  (val    (cdr dat))
				  (newkey (or (alist-ref key artifact-spec)
					      key)))
			     (cons newkey (escape-data (conc val))))) ;; convert all incoming data (symbols, numbers etc.) to a string and then escape newlines.
			 adat))
	 (new-with-type (if (alist-ref 'T new-alist)
			    new-alist
			    (cons `(T . ,artifact-type) new-alist)))
	 (with-d-card   (if (or no-d ;; no timestamp wanted
				(alist-ref 'D new-with-type))
			    new-with-type
			    (cons `(D . ,(current-seconds))
				  new-with-type))))
    (add-z-card
     (alist->sdat with-d-card))))

;;======================================================================
;;  D B   Q U E U E   I N T E R F A C E
;;======================================================================

;; artifacts (
;;   id SERIAL PRIMARY KEY,
;;   uuid TEXT NOT NULL,
;;   parent_uuid TEXT default '',
;;   artifact_type INTEGER DEFAULT 0,
;;   group_id INTEGER NOT NULL,
;;   artifact TEXT NOT NULL

;; schema is list of SQL statements - can be used to extend db with more tables
;;
(define (open-queue-db dbpath dbfile #!key (schema '()))
  (let* ((dbfname  (conc dbpath "/" dbfile))
	 (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f)))
	 (db       (open-database dbfname)))
    ;; (set-busy-handler! (dbi:db-conn db) (busy-timeout 10000))
    (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. 
	(for-each
	 (lambda (stmt)
	   (execute db stmt))
	 (cons "CREATE TABLE IF NOT EXISTS artifacts
                          (id           INTEGER PRIMARY KEY,
                           group_id     INTEGER NOT NULL,
                           uuid         TEXT NOT NULL,
                           parent_uuid  TEXT TEXT DEFAULT '',
                           artifact_type     TEXT NOT NULL,
                           artifact          TEXT NOT NULL,
                           processed    INTEGER DEFAULT 0)"
		   schema))) ;; 0=not processed, 1=processed, 2... for expansion
    db))

(define (add-to-queue db artifact uuid artifact-type parent-uuid group-id)
  (execute db "INSERT INTO artifacts (uuid,parent_uuid,artifact_type,artifact,group_id)
                   VALUES(?,?,?,?,?);" ;; $1,$2,$3,$4,$5);"
	    uuid
	    (if parent-uuid parent-uuid "");; use null string as placeholder for no parent uuid.
	    (if artifact-type (conc artifact-type) "") 
	    artifact
	    group-id))

;; given all needed parameters create a artifact and store it in the queue
;;  procs is an alist that maps artifact-type to a function that takes a list of artifact params
;;  in data and returns the uuid and artifact
;;
(define (create-and-queue conn procs artifact-type parent-uuid group-id data)
  (let ((proc (alist-ref artifact-type procs)))
    (if proc
	(let-values (( (uuid artifact) (proc data) ))
	  (add-to-queue conn artifact uuid artifact-type parent-uuid group-id)
	  uuid)
	#f)))

;; given uuid get artifact, if group-id is specified use it (reduces probablity of
;;     being messed up by a uuid collision)
;;
(define (lookup-by-uuid db artifact-uuid group-id)
  (if group-id
      (get-one db "SELECT artifact FROM artifacts WHERE group_id=? AND uuid=?;" group-id artifact-uuid)
      (get-one db "SELECT artifact FROM artifacts WHERE uuid=?;" artifact-uuid)))
      
;; find a packet by its id
;;
(define (lookup-by-id db id)
  (get-one db "SELECT artifact FROM artifacts WHERE id=?;" id))


;;======================================================================
;;  P R O C E S S   P K T S
;;======================================================================

;; given a list of field values pulled from the queue db generate a list
;; of dartifact's
;;
(define (dblst->dartifacts lst . altmap)
  (let* ((maplst (if (null? altmap)
		     '(id group-id uuid parent artifact-type artifact processed)
		     altmap))
	 (res (map cons maplst lst))) ;; produces list of pairs, i.e an alist
    (cons `(aartifact . ,(artifact->alist (alist-ref 'artifact res)))
	  res)))

;; NB// ptypes is a list of symbols, '() or #f find all types
;;
(define (get-dartifacts db ptypes group-id parent-uuid #!key (uuid #f))
  (let* ((ptype-qry (if (and ptypes
			     (not (null? ptypes)))
			(conc " IN ('" (string-intersperse (map conc ptypes) "','") "')")
			(conc " LIKE '%' ")))
	 (rows      (get-rows
		     db
		     (conc
		      "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
                         WHERE artifact_type " ptype-qry " AND group_id=?
                         AND processed=0 "
			 (if parent-uuid (conc "AND parent_uuid='" parent-uuid "' ") "")
			 (if uuid        (conc "AND        uuid='"        uuid "' ") "")
			 "ORDER BY id DESC;")
		     group-id)))
    (map dblst->dartifacts (map vector->list rows))))

;; get N artifacts not yet processed for group-id
;;
(define (get-not-processed-artifacts db group-id artifact-type limit offset)
  (map dblst->dartifacts
       (map vector->list
	    (get-rows
	     db
	     "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
                WHERE artifact_type = ? AND group_id = ? AND processed=0
                LIMIT ? OFFSET ?;"
	     (conc artifact-type) ;; convert symbols to string
	     group-id
	     limit
	     offset
	     ))))

;; given a uuid, get not processed child artifacts 
;;
(define (get-related db group-id uuid)
  (map dblst->dartifacts
       (get-rows
	db
	"SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
           WHERE parent_uuid=? AND group_id=? AND processed=0;"
	uuid group-id)))

;; generic artifact processor
;;
;; find all packets in group-id of type in ptypes and apply proc to artifactdat
;;
(define (process-artifacts conn group-id ptypes parent-uuid proc)
  (let* ((artifacts (get-dartifacts conn ptypes group-id parent-uuid)))
    (map proc artifacts)))

;; criteria is an alist ((k . valpatt) ...)
;;   - valpatt is a regex
;;   - ptypes is a list of types (symbols expected)
;;   match-type: 'any or 'all
;;
(define (find-artifacts db ptypes criteria #!key (processed #f)(match-type 'any)(artifact-spec #f)) ;; processed=#f, don't use, else use
  (let* ((artifacts (get-dartifacts db ptypes 0 #f))
	 (match-rules (lambda (artifactdat) ;; returns a list of matching rules
			(filter (lambda (c)
				  ;; (print "c: " c)
				  (let* ((ctype (car c)) ;; card type
					 (rx    (cdr c)) ;; card pattern
					 ;; (t     (alist-ref 'artifact-type artifactdat))
					 (artifact   (alist-ref 'artifact artifactdat))
					 (aartifact  (artifact->alist artifact))
					 (cdat  (alist-ref ctype aartifact)))
				    ;; (print "cdat: " cdat) ;; " aartifact: " aartifact)
				    (if cdat
					(string-match rx cdat)
					#f)))
				criteria)))
	 (res         (filter (lambda (artifactdat)
				(if (null? criteria) ;; looking for all artifacts
				    #t
				    (case match-type
				      ((any)(not (null? (match-rules artifactdat))))
				      ((all)(eq? (length (match-rules artifactdat))(length criteria)))
				      (else
				       (print "ERROR: bad match type " match-type ", expecting any or all.")))))
			      artifacts)))
    (if artifact-spec
	(dartifacts->alists res artifact-spec)
	res)))

;; get descendents of parent-uuid
;;
;; NOTE: Should be doing something like the following:
;;
;; given a uuid, get not processed child artifacts 
;; processed:
;;    #f => get all
;;     0 => get not processed
;;     1 => get processed
;;
(define (get-ancestors db group-id uuid #!key (processed #f))
  (map dblst->dartifacts
       (map vector->list
	    (get-rows
	     db
	     (conc
	      "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed 
                FROM artifacts
                 WHERE uuid IN 
                     (WITH RECURSIVE
                       tree(uuid,parent_uuid)
                        AS
                        (
                           SELECT uuid, parent_uuid
                           FROM artifacts
                           WHERE uuid = ?
                           UNION ALL
                           SELECT t.uuid, t.parent_uuid
                           FROM artifacts t
                           JOIN tree ON t.uuid = tree.parent_uuid
                        )
	              SELECT uuid FROM tree)
	    AND group_id=?" (if processed (conc " AND processed=" processed) "") ";")
	     uuid group-id))))

;; Untested
;;
(define (get-descendents db group-id uuid #!key (processed #f))
  (map dblst->dartifacts
       (map vector->list
	    (get-rows
	     db
	     (conc
	      "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed 
                FROM artifacts
                 WHERE uuid IN 
                     (WITH RECURSIVE
                       tree(uuid,parent_uuid)
                        AS
                        (
                           SELECT uuid, parent_uuid
                           FROM artifacts
                           WHERE uuid = ?
                           UNION ALL
                           SELECT t.uuid, t.parent_uuid
                           FROM artifacts t
                           JOIN tree ON t.parent_uuid = tree.uuid
                        )
	              SELECT uuid FROM tree)
	    AND group_id=?" (if processed (conc " AND processed=" processed) "") ";")
	     uuid group-id))))

;; look up descendents based on given info unless passed in a list via inlst
;;
;; (define (get-last-descendent db group-id uuid #!key (processed #f)(inlst #f))
;;   (let ((descendents (or inlst (get-descendents db group-id uuid processed: processed))))
;;     (if (null? descendents)
;; 	#f
;; 	(last descendents))))

;;======================================================================
;;  A R C H I V E S - always to a sqlite3 db 
;;======================================================================

;; open an archive db
;; path: archive-dir/<year>/month.db
;;
#;(define (archive-open-db archive-dir)
  (let* ((curr-time (seconds->local-time (current-seconds)))
	 (dbpath    (conc archive-dir "/" (time->string curr-time "%Y")))
	 (dbfile    (conc dbpath "/" (time->string curr-time "%m") ".db"))
	 (dbexists (if (file-exists? dbfile) #t (begin (create-directory dbpath #t) #f))))
    (let ((db (open-database dbfile)))
      ;; (set-busy-handler! db (busy-timeout 10000))
      (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. 
	  (execute db "CREATE TABLE IF NOT EXISTS artifacts
                          (id           INTEGER,
                           group_id     INTEGER,
                           uuid         TEXT,
                           parent_uuid  TEXT,
                           artifact_type     TEXT,
                           artifact          TEXT,
                           processed    INTEGER DEFAULT 0)"))
      db)))

;; turn on transactions! otherwise this will be painfully slow
;;
#;(define (write-archive-artifacts src-db db artifact-ids)
  (let ((artifacts (get-rows
	       src-db
	       (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact FROM artifacts WHERE id IN ("
		     (string-intersperse (map conc artifact-ids) ",") ")"))))
    ;; (dbi:with-transaction
    ;;  db
     (lambda ()
       (for-each
	(lambda (artifact)
	  (apply execute  db "INSERT INTO artifacts (id,group_id,uuid,parent_uuid,artifact_type,artifact)
                               VALUES (?,?,?,?,?,?)"
		 artifact))
	artifacts)))) ;; )

;; given a list of uuids and lists of uuids move all to
;; the sqlite3 db for the current archive period
;;
#;(define (archive-artifacts conn artifact-ids archive-dir)
  (let ((db (archive-open-db archive-dir)))
    (write-archive-artifacts conn db artifact-ids)
    (finalize! db))
  ;; (pg:with-transaction
  ;;  conn
  ;; (lambda ()
     (for-each
      (lambda (id)
	(get-one
	 conn
	 "DELETE FROM artifacts WHERE id=?" id))
      artifact-ids)) ;; ))

;; given a list of ids mark all as processed
;;
(define (mark-processed conn artifact-ids)
  ;; (pg:with-transaction
   ;; conn
   ;; (lambda ()
     (for-each
      (lambda (id)
	(get-one
	 conn
	 "UPDATE artifacts SET processed=1 WHERE id=?;" id))
      artifact-ids)) ;; x))

;; a generic artifact getter, gets from the artifacts db
;;
(define (get-artifacts conn ptypes)
  (let* ((ptypes-str    (if (null? ptypes)
			    ""
			    (conc " WHERE artifact_type IN ('" (string-intersperse ptypes ",") "') ")))
	 (qry-str       (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts" ptypes-str)))
    (map vector->list (get-rows conn qry-str))))

;; make a report of the artifacts in the db
;; ptypes of '() gets all artifacts
;; display-fields
;;
(define (make-report dest conn artifactspec display-fields . ptypes)
  (let* (;; (conn          (dbi:db-conn (s:db)))
	 (all-rows      (get-artifacts conn ptypes))
	 (all-artifacts      (flatten-all
			 all-rows
			 artifactspec
			 'id 'group-id 'uuid 'parent 'artifact-type 'artifact 'processed))
	 (by-uuid       (let ((ht (make-hash-table)))
			  (for-each
			   (lambda (artifact)
			     (let ((uuid (alist-ref 'uuid artifact)))
			       (hash-table-set! ht uuid artifact)))
			   all-artifacts)
			  ht))
	 (by-parent     (let ((ht (make-hash-table)))
			  (for-each
			   (lambda (artifact)
			     (let ((parent (alist-ref 'parent artifact)))
			       (hash-table-set! ht parent (cons artifact (hash-table-ref/default ht parent '())))))
			   all-artifacts)
			    ht))
	 (oup           (if dest (open-output-file dest) (current-output-port))))
    
    (with-output-to-port
	oup
      (lambda ()
	(print "digraph megatest_state_status {
  // ranksep=0.05
  rankdir=LR;
  node [shape=\"box\"];
")
	;; first all the names
	(for-each
	 (lambda (artifact)
	   (let* ((uuid        (alist-ref 'uuid artifact))
		  (shortuuid   (substring uuid 0 4))
		  (type        (alist-ref 'artifact-type artifact))
		  (processed   (alist-ref 'processed artifact)))
	     
	     (print "\"" uuid "\" [label=\"" shortuuid ", ("
		    type ", "
		    (if processed "processed" "not processed") ")")
	     (for-each
	      (lambda (key-field)
		(let ((val (alist-ref key-field artifact)))
		  (if val
		      (print key-field "=" val))))
	      display-fields)
	     (print "\" ];")))
	 all-artifacts)
	;; now for parent-child relationships
	(for-each
	 (lambda (artifact)
	   (let ((uuid   (alist-ref 'uuid artifact))
		 (parent (alist-ref 'parent artifact)))
	     (if (not (equal? parent ""))
		 (print "\"" parent "\" -> \"" uuid"\";"))))
	 all-artifacts)

	(print "}")
	))
    (if dest
	(begin
	  (close-output-port oup)
	  (system "dot -Tpdf out.dot -o out.pdf")))
    
    ))

;;======================================================================
;; Read ref artifacts into a vector < laststr hash table > 
;;======================================================================



;;======================================================================
;; Read/write packets to files (convience functions)
;;======================================================================

;; write alist to a artifact file
;;
(define (write-alist->artifact targdir dat #!key (artifactspec '())(ptype #f))
  (let-values (((uuid artifact)(alist->artifact dat artifactspec ptype: ptype)))
    (with-output-to-file (conc targdir "/" uuid ".artifact")
      (lambda ()
	(print artifact)))
    uuid)) ;; return the uuid

;; read artifact into alist
;;
(define (read-artifact->alist artifact-file #!key (artifactspec #f))
  (artifact->alist (with-input-from-file
		  artifact-file
		read-string)
	      artifactspec: artifactspec))

;;======================================================================
;; File utils, stuff useful for file management
;;======================================================================

(define (file-get-sha1 fname)
  (let* ((sha1-res (run/strings (sha1sum ,fname))))
    (car (string-split (car sha1-res)))))

(define (link-or-copy srcf destf)
  (or (handle-exceptions
       exn
       #f
       (file-link srcf destf))
      (if (file-exists? destf)
	  (print "NOTE: destination already exists, skipping copy.")
	  (copy-file srcf destf))))

;; (define (files-diff file1 file2)
;;   (let* ((diff-res (with-input-from-port
;; 		       (run/port (diff "-q" ,file1 ,file2))
;; 		     (lambda ()
;; 		       (let* ((res (read-line)))
;; 			 (read-lines)
;; 			 res)))))
;;     (car (string-split sha1-res))))
;; 


(define (check-same file1 file2)
  (cond
   ((not (and (file-exists? file1)(file-exists? file2))) #f)
   ((not (equal? (file-size file1)(file-size file2))) #f)
   (else
    (let-values (((status run-ok process-id)
		  (run (diff "-q" ,file1 ,file2))))
      status))))

(define *pcache* (make-hash-table))
(define (get-device dir)
  (let ((indat (or (hash-table-ref/default *pcache* dir #f)
		   (let* ((inp (open-input-pipe (conc "df --output=source \""dir"\"")))
			  (res (read-lines inp)))
		     (close-input-port inp)
		     (hash-table-set! *pcache* dir res)
		     res))))
    (cadr indat)))

(define (same-partition? dir1 dir2)
  (equal? (get-device dir1)(get-device dir2)))

(define (link-if-same-partition file1 file2)
  (let* ((dir1 (pathname-directory file1))
	 (dir2 (pathname-directory file2))
	 (f1   (pathname-file file1))
	 (f2   (pathname-file file2)))
    (if (same-partition? dir1 dir2)
	(let* ((tmpname (conc "."f2"-"(current-seconds))))
	  ;; this steps needs to be executed as actual user    
	  (move-file file2 (conc dir1 "/" tmpname))
	  (file-link file1 file2)
	  (delete-file (conc dir1 "/" tmpname))))))

(define (uuid-first-two-letters sha1sum)
  (substring sha1sum 0 2))

(define (uuid-remaining-letters sha1sum)
  (let ((slen (string-length sha1sum)))
    (substring sha1sum 2 slen)))

(define (archive-dest destd sha1sum)
  (let* ((subdir         (uuid-first-two-letters sha1sum)) ;; (substring sha1sum 0 2))
	 ;; (slen           (string-length sha1sum))
	 (rem            sha1sum #;(uuid-remaining-letters sha1sum)) ;; (substring sha1sum 3 slen))
	 (full-dest-dir  (conc destd"/"subdir))
	 (full-dest-file (conc full-dest-dir"/"rem)))
    (if (not (directory-exists? full-dest-dir))
	(create-directory full-dest-dir #t))
    full-dest-file))

(define (write-to-archive data destd #!optional (nextnum #f))
  (let* ((sha1sum    (calc-sha1 data))
	 (full-dest  (conc (archive-dest destd sha1sum)
			   (if nextnum (conc "."nextnum) ""))))
    (if (file-exists? full-dest)
	(if (equal? (string-intersperse (with-input-from-file full-dest read-lines) "\n")
		    data)
	    (begin
	      ;; (print "INFO: data already exists in "full-dest" and is identical")
	      sha1sum)
	    (let ((nextnum (if nextnum (+ nextnum 1) 0)))
	      (print "WARN: data already exists in "full-dest" but is different! Trying again...")
	      (write-to-archive data destd nextnum)))
	(begin
	  (with-output-to-file
	      full-dest
	    (lambda ()
	      (print data)))
	  sha1sum)))) ;; BUG? Does print munge data?

;; copy srcf with sha1sum aabc... to aa/bc...
;;
(define (archive-copy srcf destd sha1sum)
  (let* ((full-dest-file (archive-dest destd sha1sum)))
    (let loop ((trynum 0))
      (let ((dest-name (if (> trynum 0)
			   (conc full-dest-file"-"trynum)
			   full-dest-file)))
	(cond
	 ((not (file-exists? srcf)) #f) ;; this should be an error?
	 ((and (file-exists? srcf)
	       (file-exists? dest-name))
	  (if (check-same srcf dest-name)
	      (link-if-same-partition dest-name srcf)
	      (loop (+ trynum 1)))) ;; collisions are rare, this protects against them
	 ((not (file-exists? dest-name))
	  (link-or-copy srcf dest-name))
	 (else #f))))))

;; multi-glob
(define (multi-glob globstrs inpath)
  ;; (print "multi-glob: "globstrs", "inpath)
  (if (equal? inpath "")
      globstrs
      (let* ((parts     (string-split inpath "/" #t))
	     (nextpart  (car parts))
	     (remaining (string-intersperse (cdr parts) "/")))
	(if (and (equal? nextpart "") ;; this must be a leading / meaning root directory
	         (null? globstrs))
	    (multi-glob '("/") remaining)
	    (begin
	      ;; (print "nextpart="nextpart", remaining="remaining)
	      (apply append
              (map (lambda (gstr)
                      (let* ((pathstr  (conc gstr"/"nextpart))
		   	     (pathstrs (glob pathstr)))
		          ;; (print "pathstr="pathstr)
		          (multi-glob pathstrs remaining)))
	               globstrs)))))))
	     

;; perm[/user:group]:
;;   DDD - octal perm (future expansion)
;;   -   - use umask/defacto perms (i.e. don't actively do anything)
;;   x   - mark as executable
;;
;; Cards:
;;   file:      f perm fname
;;   directory: d perm fname artifactid
;;   link:      l perm lname destpath
;;
;;   NOTE: cards are kept as (C . "value")
;;
;; given a directory path, ignore list and artifact store (hash-table):
;;  1. create sha1 tree at dest (e.g. aa/b3a7 ...)
;;  2. create artifact for each dir
;;    - cards for all files
;;    - cards for files that are symlinks or executables
;;  3. return (artifactid . artifact)
;;
;; NOTES:
;;   Use destdir of #f to not create sha1 tree
;;   Hard links will be used if srcdir and destdir appear to be same partion
;;
;; (alist->artifact adat aspec #!key (ptype #f))
;;
;;
;; (load "../../artifacts/artifacts.scm")(import big-chicken srfi-69 artifacts)(define dirdat (make-hash-table))
;; (capture-dir ".." ".." "/tmp/junk" '() dirdat)
;;
;; [procedure] (file-type FILE [LINK [ERROR]])
;; Returns the file-type for FILE, which should be a filename, a file-descriptor or a port object. If LINK is given and true, symbolic-links are not followed:
;; 
;;  regular-file
;;  directory
;;  fifo
;;  socket
;;  symbolic-link
;;  character-device
;;  block-device
;; Note that not all types are supported on every platform. If ERROR is given and false, then file-type returns #f if the file does not exist; otherwise, it signals an error.
;; 
;; 
(define (capture-dir curr-dir src-dir dest-dir ignore-list artifacts all-seen)
  (let* ((dir-dat (directory-fold
		   (lambda (fname res) ;; res is a list of artifact cards
		     (let* ((fullname   (conc curr-dir"/"fname)))
		       ;; (print "INFO: processing "fullname)
		       (if (hash-table-ref/default all-seen fullname #f) ;; something circular going on
			   (begin
			     (print "WARNING: possible circular link(s) "fullname)
			     res)
			   (let* ((ftype (file-type fullname #t #f)))
			     (hash-table-set! all-seen fullname ftype)
			     (cons
			      (case ftype ;; get the card
			       ((directory) ;; (directory? fullname)
				(let* ((new-curr-dir (conc curr-dir"/"fname))
				       (new-src-dir  (conc src-dir"/"fname)))
				  (let* ((dir-dat (capture-dir new-curr-dir new-src-dir
							       dest-dir ignore-list artifacts all-seen))
					 (a-id (car dir-dat))
					 (artf (cdr dir-dat)))
				    (hash-table-set! artifacts a-id artf)
				    (cons 'd (conc "- "a-id" "fname))))) ;; the card
			       ((symbolic-link) ;; (symbolic-link? fullname)
				(let ((ldest (read-symbolic-link fullname)))
				  (cons 'l (conc "- "fname"/"ldest)))) ;; delimit link name from dest with /
			       ((regular-file) ;; must be a file
				(let* ((start      (current-seconds))
				       (sha1sum    (file-get-sha1 fullname))
				       (perms      (if (file-executable? fullname) "x" "-")))
				  (let ((runtime (- (current-seconds) start)))
				    (if (> runtime 1)
					(print "INFO: file "fullname" took "runtime" seconds to calculate sha1.")))
				  (if dest-dir
				      (archive-copy fullname dest-dir sha1sum))
				  (cons 'f (conc perms " "sha1sum" "fname))))
			       (else
				(print "WARNING: file "fullname" of type "ftype" is NOT supported and will converted to empty file.")
				(let* ((sha1sum (write-to-archive "" dest-dir)))
				  (cons 'f (conc "- "sha1sum" "fname)))))
			      res)))))
		       '() src-dir #:dotfiles? #t))) ;; => (values srcdir_artifact sub_artifacts_list)
    ;; (print "dir-dat: " dir-dat)
    (let-values (((a-id artf)
		  (alist->artifact dir-dat '() ptype: 'd no-d: #t)))
      (hash-table-set! artifacts a-id artf)
      (cons a-id artf))))

;; maybe move this into artifacts?
;;
;; currently moves *.artifact into a bundle and moves the artifacts into attic
;; future: move artifacts under 1 meg in size into bundle up to 10 meg in size
;;
(define (artifact-rollup bundle-dir) ;; cfg storepath)
  ;; (let* ((bundle-dir (calc-bundle-dir cfg storepath)))
    (let* ((bundles   (glob (conc bundle-dir"/*.bundle")))
	   (artifacts (glob (conc bundle-dir"/*.artifact"))))
      (if (> (length artifacts) 30) ;; rollup only if > 30 artifacts
	  ;; if we have unbundled artifacts, bundle them
	  (let* ((ht     (read-artifacts-into-hash #f artifacts: artifacts))
		 (bundle (hash-of-artifacts->bundle ht)))
	    (write-bundle bundle bundle-dir)
	    (create-directory (conc bundle-dir"/attic") #t)
	    (for-each
	     (lambda (full-fname)
	       (let* ((fname   (pathname-strip-directory full-fname))
		      (newname (conc bundle-dir"/attic/"fname)))
		 (move-file full-fname newname #t)))
	     artifacts)
	    (conc "bundled "(length artifacts)))
	  "not enough artifacts to bundle")))

;; if destfile is a directory then calculate the sha1sum of the bundle and store it
;; by <sha1sum>.bundle
;;
;; incoming dat is pure text (bundle already sorted and appended:
;;
(define (write-bundle bdl-data destdir)
  (let* ((bdl-uuid  (calc-sha1 bdl-data)))
    (with-output-to-file
	(conc destdir"/"bdl-uuid".bundle")
      (lambda ()
	(print bdl-data)))))

;; minimal (and hopefully fast) artifact reader
;; TODO: Add check of shar sum.
;;
(define (minimal-artifact-read fname)
  (let* ((indat (with-input-from-file fname read-lines)))
    (if (null? indat)
	(values #f (conc "did not find an artifact in "fname))
	(let* ((zcard (last indat))
	       (cardk (substring zcard 0 1))
	       (cardv (substring zcard 2 (string-length zcard))))
	  (if (equal? cardk "Z")
	      (values cardv (string-intersperse indat "\n"))
	      (values #f (conc fname" is not a valid artifact")))))))

;; read artifacts from directory into hash
;; NOTE: support for max-count not implemented yet
;;
(define (read-artifacts-into-hash dir #!key (artifacts #f) (max-count #f)(ht #f))
  (let* ((artifacts (or artifacts
			(glob (conc dir"/*.artifact"))))
	 (ht        (or ht (make-hash-table))))
    (for-each
     (lambda (fname)
       (let-values (((uuid afct)
		     (minimal-artifact-read fname)))
	 (hash-table-set! ht uuid afct)))
     artifacts)
    ht))

;; ht is:
;;   uuid => artifact text
;; use write-bundle to put result into a bundle file
;;
(define (hash-of-artifacts->bundle ht)    
  (fold (lambda (k res)
	  (let* ((v (hash-table-ref ht k)))
	    (if res
		(conc res"\n"v)
		v)))
	#f
	(sort (hash-table-keys ht) string<=?)))

;; minimal artifact to alist
;;
(define (minimal-artifact->alist afact)
  (let* ((lines   (string-split afact "\n")))
    (map (lambda (a)
	   (let* ((key (string->symbol (substring a 0 1)))
		  (sl  (string-length a))
		  (val (if (> sl 2)
			   (substring a 2 sl)
			   "")))
	     (cons key val)))
	 lines)))

;; some accessors for common cards
(define (afact-get-D afact)
  (let ((dval (alist-ref 'D afact)))
    (if dval
	(string->number dval)
	#f)))

(define (afact-get-T afact) ;; get the artifact type as a symbol
  (let ((val (alist-ref 'T afact)))
    (if val
	(string->symbol val)
	val)))

(define (afact-get-Z afact)
  (alist-ref 'Z afact))

(define (afact-get afact key default)
  (or (alist-ref key afact)
      default))

(define (afact-get-number/default afact key default)
  (let ((val (alist-ref key afact)))
    (if val
	(or (string->number val) default) ;; seems wrong
	default)))

;; bundles are never big and reading into memory for processing is fine
;;
(define (read-bundle srcfile #!optional (mode 'uuid-raw))
  (let* ((indat (with-input-from-file srcfile read-lines)))
    (let loop ((tail indat)
	       (dat '())  ;; artifact being extracted
	       (res '())) ;; list of artifacts
      (if (null? tail)
	  (reverse res) ;; last dat should be empty list
	  (let* ((curr-line (car tail)))
	    (let-values (((ctype cdata)
			  (card->type/value curr-line)))
	      (let* ((is-z-card (eq? 'Z ctype))
		     (new-dat   (cons (case mode
					((uuid-raw) curr-line)
					(else       (cons ctype cdata)))
				      dat)))
		(if is-z-card
		    (loop (cdr tail) ;; done with this artifact
			  '()
			  (cons (case mode
				  ((uuid-raw) (cons cdata (string-intersperse (reverse new-dat) "\n")))
				  (else       (reverse new-dat)))
				res))
		    (loop (cdr tail)
			  new-dat
			  res)))))))))


;; find all .bundle and .artifacts files in bundle-dir
;; and inport them into sqlite handle adb
;; 
(define (refresh-artifacts-db adb bundle-dir)
  (let* ((bundles   (glob (conc bundle-dir"/*.bundle")))
	 (artifacts (glob (conc bundle-dir"/*.artifact")))
	 (uuids     (get-all-uuids adb 'hash)))
    (with-transaction
     adb
     (lambda ()
       (for-each
	(lambda (bundle-file)
	  ;; (print "Importing artifacts from "bundle-file)
	  (let* ((bdat (read-bundle bundle-file 'uuid-raw))
		 (count 0)
		 (inc   (lambda ()(set! count (+ count 1)))))
	    (for-each
	     (lambda (adat)
	       (match
		adat
		((zval . artifact)
		 (if (not (hash-table-exists? uuids zval))
		     (begin
		       ;; (print "INFO: importing new artifact "zval" from bundle "bundle-file)
		       (inc)
		       (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);"
				zval artifact)
		       (hash-table-set! uuids zval #t))))
		(else
		 (print "ERROR: Bad artifact data "adat))))
	     bdat)
	    (print "INFO: imported "count" artifacts from "bundle-file)))
	bundles)
       (for-each
	(lambda (artifact-file)
	  ;; (print "Importing artifact from "artifact-file)
	  (let-values (((uuid artifact) (minimal-artifact-read artifact-file)))
	    (if uuid
		(if (not (hash-table-exists? uuids uuid))
		    (begin
		      ;; (print "INFO: importing new artifact "uuid" from "artifact-file)
		      (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);"
			       uuid artifact)
		      (hash-table-set! uuids uuid #t)))
		(print "Bad artifact in "artifact-file))))
	artifacts)))))

;;======================================================================
;;  Artifacts db cache
;;======================================================================

;; artifacts
;;   id SERIAL PRIMARY KEY,
;;   uuid TEXT NOT NULL,
;;   artifact TEXT NOT NULL
;;
;; parents
;;   id INTEGER REFERENCES artids.id,  -- 
;;   parent_id  REFERENCES artids.id
;;
;; schema is list of SQL statements - can be used to extend db with more tables
;;
(define (open-artifacts-db dbpath dbfile #!key (schema '()))
  (let* ((dbfname  (conc dbpath "/" dbfile))
	 (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f)))
	 (adb      (open-database dbfname)))
    (set-busy-handler! adb (make-busy-timeout 10000))
    (execute adb "PRAGMA synchronous = 0;")
    (if (not dbexists)
	(with-transaction
	 adb
	 (lambda ()
	   (for-each
	    (lambda (stmt)
	      (execute adb stmt))
	    (append `("CREATE TABLE IF NOT EXISTS artifacts
                          (id           INTEGER PRIMARY KEY,
                           uuid         TEXT NOT NULL,
                           artifact     TEXT NOT NULL)"
		      
		      "CREATE TABLE IF NOT EXISTS parents
                          (id INTEGER REFERENCES artifacts(id) NOT NULL,
                           parent_id INTEGER REFERENCES artifacts(id) NOT NULL)")
		    schema)))))
    adb))

(define (generate-year-month-name #!optional (seconds #f))
  (let* ((curr-time (seconds->local-time (or seconds (current-seconds)))))
    (time->string curr-time "%Y%m")))

;; I don't like this function. TODO: remove the
;; mode and option to return ht. Use instead the
;; get-all-artifacts below
;;
(define (get-all-uuids adb #!optional (mode #f))
  (let* ((res (fold-row
	       (lambda (res uuid)
		 (cons uuid res))
	       '()
	       adb
	       "SELECT uuid FROM artifacts;")))
    (case mode
      ((hash)
       (let* ((ht (make-hash-table)))
	 (for-each
	  (lambda (uuid)
	    (hash-table-set! ht uuid #t))
	  res)
	 ht))
      (else res))))

;; returns raw artifacts (i.e. NOT alists but instead plain text)
(define (get-all-artifacts adb)
  (let* ((ht  (make-hash-table)))
    (for-each-row
     (lambda (id uuid artifact)
       (hash-table-set! ht uuid `(,id ,uuid ,artifact)))
     adb
     "SELECT id,uuid,artifact FROM artifacts;")
    ht))

;; given a bundle-dir copy or create to /tmp and open
;; the YYMM.db file and hand the handle to the given proc
;; NOTE: we operate in /tmp/ to accomodate users on NFS
;; where slamming Unix locks at an NFS filer can cause
;; locking fails. Eventually this /tmp behavior will be
;; configurable.
;;
(define (with-todays-adb bundle-dir proc)
  (let* ((dbname   (conc (generate-year-month-name) ".db"))
	 (destname (conc bundle-dir"/"dbname))
	 (tmparea  (conc "/tmp/"(current-user-name)"-"(calc-sha1 bundle-dir)))
	 (tmpname  (conc tmparea"/"dbname))
	 (lockfile (conc destname".update-in-progress")))
    ;; (print "with-todays-adb, bundle-dir: "bundle-dir", dbname: "dbname", destname: "destname",\n    tmparea: " tmparea", lockfile: "lockfile)
    (if (not (file-exists? tmparea))(create-directory tmparea #t))
    (let loop ((count 0))
      (if (file-exists? lockfile)
	  (if (< count 30) ;; aproximately 30 seconds
	      (begin
		(sleep 1)
		(loop (+ 1 count)))
	      (print "ERROR: "lockfile" exists, proceeding anyway"))
	  (if (file-exists? destname)
	      (begin
		(copy-file destname tmpname #t)
		(copy-file destname lockfile #t)))))
    (let* ((adb  (open-artifacts-db tmparea dbname))
	   (res  (proc adb)))
      (finalize! adb)
      (copy-file tmpname destname #t)
      (delete-file* lockfile)
      res)))

) ;; module artifacts

;; ATTIC