ADDED artifacts.scm Index: artifacts.scm ================================================================== --- /dev/null +++ artifacts.scm @@ -0,0 +1,24 @@ +;;====================================================================== +;; Copyright 2019, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit artifacts)) + +(include "artifacts/artifacts.scm") + ADDED artifacts/README Index: artifacts/README ================================================================== --- /dev/null +++ artifacts/README @@ -0,0 +1,1 @@ +NOTE: keep megatest/artifacts/ in sync with datastore/artifacts ADDED artifacts/artifacts.meta Index: artifacts/artifacts.meta ================================================================== --- /dev/null +++ artifacts/artifacts.meta @@ -0,0 +1,21 @@ +;; -*- scheme -*- +( +; Your egg's license: +(license "BSD") + +; Pick one from the list of categories (see below) for your egg and enter it +; here. +(category db) + +; A list of eggs pkts depends on. If none, you can omit this declaration +; altogether. If you are making an egg for chicken 3 and you need to use +; procedures from the `files' unit, be sure to include the `files' egg in the +; `needs' section (chicken versions < 3.4.0 don't provide the `files' unit). +; `depends' is an alias to `needs'. +;; (needs (autoload "3.0")) + +; A list of eggs required for TESTING ONLY. See the `Tests' section. +(test-depends test) + +(author "Matt Welland") +(synopsis "A sha1-chain based datastore similar to the data format in fossil scm, consisting of artifacts of single line cards.")) ADDED artifacts/artifacts.release-info Index: artifacts/artifacts.release-info ================================================================== --- /dev/null +++ artifacts/artifacts.release-info @@ -0,0 +1,3 @@ +(repo fossil "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}") +(uri zip "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}/zip/{egg-name}.zip?uuid={egg-release}") +(release "1.0") ADDED artifacts/artifacts.scm Index: artifacts/artifacts.scm ================================================================== --- /dev/null +++ artifacts/artifacts.scm @@ -0,0 +1,1624 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of artifacts +;; +;; Pkts is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Pkts is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Pkts. If not, see . +;; + +;; CARDS: +;; +;; A card is a line of text, the first two characters are a letter followed by a +;; space. The letter is the card type. +;; +;; artifact: +;; +;; An artifact is a sorted list of cards with a final card Z that contains the shar1 hash +;; of all of the preceding cards. +;; +;; AARTIFACT: +;; +;; An alist mapping card types to card data +;; '((T . "artifacttype") +;; (a . "some content")) +;; +;; EARTIFACT: +;; +;; Extended packet using friendly keys. Must use a artifactspec to convert to/from eartifacts +;; '((ptype . "artifacttype") +;; (adata . "some content)) +;; +;; DARTIFACT: +;; +;; artifacts pulled from the database have this format: +;; +;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist +;; (t . "v1.63/tip/dev") +;; (c . "QUICKPATT") +;; (T . "runstart") +;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd") +;; (D . "1488995096.0")) +;; (id . 8) +;; (group-id . 0) +;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b") +;; (parent . "") +;; (artifact-type . "runstart") +;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b")) +;; +;; artifactspec is alist of alists mapping types and nicekeys to keys +;; +;; '((posting . ((title . t) +;; (url . u) +;; (blurb . b))) +;; (comment . ((comment . c) +;; (score . s)))) + +;; Reserved cards: +;; P : artifact parent +;; R : reference artifact containing mapping of short string -> sha1sum strings +;; T : artifact type +;; D : current time from (current-time), unless provided +;; Z : shar1 hash of the packet + +;; Example usage: +;; +;; Create a artifact: +;; +;; (use artifacts) +;; (define-values (uuid artifact) +;; (alist->artifact +;; '((fruit . "apple") (meat . "beef")) ;; this is the data to convert +;; '((foods (fruit . f) (meat . m))) ;; this is the artifact spec +;; ptype: +;; 'foods)) +;; +;; Add to artifact queue: +;; +;; (define db (open-queue-db "/tmp/artifacts" "artifacts.db")) +;; (add-to-queue db artifact uuid 'foods #f 0) ;; no parent and use group_id of 0 +;; +;; Retrieve the packet from the db and extract a value: +;; +;; (alist-ref +;; 'meat +;; (dartifact->alist +;; (car (get-dartifacts db #f 0 #f)) +;; '((foods (fruit . f) +;; (meat . m))))) +;; => "beef" +;; + +(module artifacts +( +;; cards, util and misc +;; sort-cards +;; calc-sha1 +;; +;; low-level constructor procs, exposed only for development/testing, will be removed +construct-sdat +construct-artifact +card->type/value +add-z-card + +;; queue database procs +open-queue-db +add-to-queue +create-and-queue +;; lookup-by-uuid +lookup-by-id +get-dartifacts +get-not-processed-artifacts +get-related +find-artifacts +process-artifacts +get-descendents +get-ancestors +get-artifacts +;; get-last-descendent +;; with-queue-db +;; load-artifacts-to-db + +;; procs that operate directly on artifacts, sdat, aartifacts, dartifacts etc. +artifact->alist ;; artifact -> aartifact (i.e. alist) +artifact->sdat ;; artifact -> '("a aval" "b bval" ...) +sdat->alist ;; '("a aval" "b bval"...) -> ((a . "aval")(b . "bval") ...) +dblst->dartifacts ;; convert list of tuples from queue db into dartifacts +dartifact->alist ;; flatten a dartifact into an alist containing all db fields and the artifact alist +dartifacts->alists ;; apply dartifact->alist to a list of alists using a artifact-spec +alist->artifact ;; returns two values uuid, artifact +get-value ;; looks up a value given a key in a dartifact +flatten-all ;; merge the list of values from a query which includes a artifact into a flat alist <== really useful! +check-artifact + +;; artifact alists +write-alist->artifact +read-artifact->alist + +;; archive database +;; archive-open-db +;; write-archive-artifacts +;; archive-artifacts +;; mark-processed + +;; artifactsdb +artifactdb-conn ;; useful +artifactdb-fname +artifactsdb-open +artifactsdb-close +artifactsdb-add-record +;; temporary +artifactdb-artifactspec + +;; utility procs +increment-string ;; used to get indexes for strings in ref artifacts +make-report ;; make a .dot file +calc-sha1 +uuid-first-two-letters +uuid-remaining-letters + +;; file and directory utils +multi-glob +capture-dir +file-get-sha1 +check-same +link-or-copy +same-partition? +link-if-same-partition +archive-copy +write-to-archive +artifact-rollup +read-artifacts-into-hash +hash-of-artifacts->bundle +archive-dest + +;; pathname-full-filename + +;; minimal artifact functions +minimal-artifact-read +minimal-artifact->alist +afact-get-D +afact-get-Z +afact-get-T +afact-get +afact-get-number/default + + +;; bundles +write-bundle +read-bundle + +;; new artifacts db +with-todays-adb +get-all-artifacts +refresh-artifacts-db + +) + +(import (chicken base) scheme (chicken process) (chicken time posix) + (chicken io) (chicken file) (chicken pathname) + chicken.process-context.posix (chicken string) + (chicken time) (chicken sort) (chicken file posix) (chicken condition) srfi-1 + regex srfi-13 srfi-69 (chicken port) (chicken process-context) + crypt sha1 matchable message-digest sqlite3 typed-records + directory-utils + scsh-process) + +;;====================================================================== +;; DATA MANIPULATION UTILS +;;====================================================================== + +(define-inline (unescape-data data) + (string-translate* data '(("\\n" . "\n") ("\\\\" . "\\")))) + +(define-inline (escape-data data) + (string-translate* data '(("\n" . "\\n") ("\\" . "\\\\")))) + +(define-inline (make-card type data) + (conc type " " (escape-data (->string data)))) + +;; reverse an alist for doing artifactkey -> external key conversions +;; +(define-inline (reverse-aspec aspec) + (map (lambda (dat) + (cons (cdr dat)(car dat))) + aspec)) + +;; add a card to the list of cards, sdat +;; if type is #f return only sdat +;; if data is #f return only sdat +;; +(define-inline (add-card sdat type data) + (if (and type data) + (cons (make-card type data) sdat) + sdat)) + +;;====================================================================== +;; STRING AS FUNKY NUMBER +;;====================================================================== + +;; NOTE: PTDZ are removed as they are reserved. NB// the R card is not used in a +;; ref, instead the P parent card is used. +;; Question: Why does it matter to remove PTDZ? +;; To make the ref easier to use the ref strings will be the keys +;; so we cannot have overlap with any actual keys. But this is a +;; bit silly. What we need to do instead is reject keys of length +;; one where the char is in PTDZ +;; +;; This is basically base92 +;; +(define string-num-chars (string->list "!#$%&'()*+,-./0123456789:;<=>?@ABCEFGHIJKLMNOQRSUVWXY[\\]^_abcdefghijklmnopqrstuvwxyz{|}~")) +;; "0123456789abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOQSUVWXY!#$%&'()*+,-./[]:;<=>?\\^_{}|")) + +(define (char-incr inchar) + (let* ((carry #f) + (next-char (let ((rem (member inchar string-num-chars))) + (if (eq? (length rem) 1) ;; we are at the last character in our string-num-chars list + (begin + (set! carry #t) + (car string-num-chars)) + (cadr rem))))) + (values next-char carry))) + +(define (increment-string str) + (if (string-null? str) + "0" + (let ((strlst (reverse (string->list str)))) ;; need to process the string from the lsd + (list->string + (let loop ((hed (car strlst)) + (tal (cdr strlst)) + (res '())) + (let-values (((newhed carry)(char-incr hed))) + ;; (print "newhed: " newhed " carry: " carry " tal: " tal) + (let ((newres (cons newhed res))) + (if carry ;; we'll have to propagate the carry + (if (null? tal) ;; at the end, tack on "0" (which is really a "1") + (cons (car string-num-chars) newres) + (loop (car tal)(cdr tal) newres)) + (append (reverse tal) newres))))))))) + +;;====================================================================== +;; P K T S D B I N T E R F A C E +;; +;; INTEGER, REAL, TEXT +;;====================================================================== +;; +;; spec +;; ( (tablename1 . (field1name L1 TYPE) +;; (field2name L2 TYPE) ... ) +;; (tablename2 ... )) +;; +;; Example: (tests (testname n TEXT) +;; (rundir r TEXT) +;; ... ) +;; +;; artifact keys are taken from the first letter, if that is not unique +;; then look at the next letter and so on +;; + +;; simplify frequent need to get one result with default +;; +(define (get-one db default qry . params) + (apply fold-row + car + default + db + qry + params)) + +(define (get-rows db qry . params) + (apply fold-row + cons + db + qry + params)) + +;; use this struct to hold the artifactspec and the db handle +;; +(defstruct artifactdb + (fname #f) + (artifactsdb-spec #f) + (artifactspec #f) ;; cache the artifactspec + (field-keys #f) ;; cache the field->key mapping (field1 . k1) ... + (key-fields #f) ;; cache the key->field mapping + (conn #f) + ) + +;; WARNING: There is a simplification in the artifactsdb spec w.r.t. artifactspec. +;; The field specs are the cdr of the table list - not a full +;; list. The extra list level in artifactspec is gratuitous and should +;; be removed. +;; +(define (artifactsdb-spec->artifactspec tables-spec) + (map (lambda (tablespec) + (list (car tablespec) + (map (lambda (field-spec) + (cons (car field-spec)(cadr field-spec))) + (cdr tablespec)))) + tables-spec)) + +(define (artifactsdb-open dbfname artifactsdb-spec) + (let* ((pdb (make-artifactdb)) + (dbexists (file-exists? dbfname)) + (db (open-database dbfname))) + (artifactdb-artifactsdb-spec-set! pdb artifactsdb-spec) + (artifactdb-artifactspec-set! pdb (artifactsdb-spec->artifactspec artifactsdb-spec)) + (artifactdb-fname-set! pdb dbfname) + (artifactdb-conn-set! pdb db) + (if (not dbexists) + (artifactsdb-init pdb)) + pdb)) + +(define (artifactsdb-init artifactsdb) + (let* ((db (artifactdb-conn artifactsdb)) + (artifactsdb-spec (artifactdb-artifactsdb-spec artifactsdb))) + ;; create a table for the artifacts themselves + (execute db "CREATE TABLE IF NOT EXISTS artifacts (id INTEGER PRIMARY KEY, zkey TEXT, record_id INTEGER, artifact TEXT);") + (for-each + (lambda (table) + (let* ((table-name (car table)) + (fields (cdr table)) + (stmt (conc "CREATE TABLE IF NOT EXISTS " + table-name + " (id INTEGER PRIMARY KEY," + (string-intersperse + (map (lambda (fieldspec) + (conc (car fieldspec) " " + (caddr fieldspec))) + fields) + ",") + ");"))) + (execute db stmt))) + artifactsdb-spec))) + +;; create artifact from the data and insert into artifacts table +;; +;; data is assoc list of (field . value) ... +;; tablename is a symbol matching the table name +;; +(define (artifactsdb-add-record artifactsdb tablename data #!optional (parent #f)) + (let*-values (((zkey artifact) (alist->artifact data (artifactdb-artifactspec artifactsdb) ptype: tablename))) + ;; have the data as alist so insert it into appropriate table also + (let* ((db (artifactdb-conn artifactsdb))) + ;; TODO: Address collisions + (execute db "INSERT INTO artifacts (zkey,artifact,record_id) VALUES (?,?,?);" + zkey artifact -1) + (let* (;; (artifactid (artifactsdb-artifactkey->artifactid artifactsdb artifactkey)) + (record-id (artifactsdb-insert artifactsdb tablename data))) + (execute db "UPDATE artifacts SET record_id=? WHERE zkey=?;" + record-id zkey) + )))) + +;; +(define (artifactsdb-insert artifactsdb tablename data) + (let* ((db (artifactdb-conn artifactsdb)) + (stmt (conc "INSERT INTO " tablename + " (" (string-intersperse (map conc (map car data)) ",") + ") VALUES ('" + ;; TODO: Add lookup of data type and do not + ;; wrap integers with quotes + (string-intersperse (map conc (map cdr data)) "','") + "');"))) + (print "stmt: " stmt) + (execute db stmt) + ;; lookup the record-id and return it + + )) + +(define (artifactsdb-close artifactsdb) + (finalize! (artifactdb-conn artifactsdb))) + +;; (let loop ((s "0")(n 0))(print s)(if (< n 5000)(loop (increment-string s)(+ n 1)))) + +;;====================================================================== +;; CARDS, MISC and UTIL +;;====================================================================== + +;; given string (likely multi-line) "dat" return shar1 hash +;; +(define (calc-sha1 instr) + (message-digest-string + (sha1-primitive) + instr)) + +;; given a single card return its type and value +;; +(define (card->type/value card) + (let ((ctype (substring card 0 1)) + (cval (substring card 2 (string-length card)))) + (values (string->symbol ctype) cval))) + +;;====================================================================== +;; SDAT procs +;; sdat is legacy/internal usage. Intention is to remove sdat calls from +;; the exposed calls. +;;====================================================================== + +;; sort list of cards +;; +(define-inline (sort-cards sdat) + (sort sdat string<=?)) + +;; artifact rules +;; 1. one card per line +;; 2. at least one card +;; 3. no blank lines + +;; given sdat, a list of cards return uuid, packet (as sdat) +;; +(define (add-z-card sdat) + (let* ((sorted-sdat (sort-cards sdat)) + (dat (string-intersperse sorted-sdat "\n")) + (uuid (calc-sha1 dat))) + (values + uuid + (conc + dat + "\nZ " + uuid)))) + +(define (check-artifact artifact) + (handle-exceptions + exn + #f ;; anything goes wrong - call it a crappy artifact + (let* ((sdat (string-split artifact "\n")) + (rdat (reverse sdat)) ;; reversed + (zdat (car rdat)) + (Z (cadr (string-split zdat))) + (cdat (string-intersperse (reverse (cdr rdat)) "\n"))) + (equal? Z (calc-sha1 cdat))))) + +;;====================================================================== +;; AARTIFACTs +;;====================================================================== + +;; convert a sdat (list of cards) to an alist +;; +(define (sdat->alist sdat) + (let loop ((hed (car sdat)) + (tal (cdr sdat)) + (res '())) + (let-values (( (ctype cval)(card->type/value hed) )) + ;; if this card is not one of the common ones tack it on to rem + (let* ((oldval (alist-ref ctype res)) + (newres (cons (cons ctype + (if oldval ;; list or string + (if (list? oldval) + (cons cval oldval) + (cons cval (list oldval))) + cval)) + res))) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b") <= this is a the alist +;; (t . "v1.63/tip/dev") +;; (c . "QUICKPATT") +;; (T . "runstart") +;; (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd") +;; (D . "1488995096.0")) +;; (id . 8) +;; (group-id . 0) +;; (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b") +;; (parent . "") +;; (artifact-type . "runstart") +;; (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b")) +;; +;; artifactspec is alist of alists mapping types and nicekeys to keys +;; +;; '((posting . ((title . t) +;; (url . u) +;; (blurb . b))) +;; (comment . ((comment . c) +;; (score . s)))) + +;; DON'T USE? +;; +(define (get-value field dartifact . spec-in) + (if (null? spec-in) + (alist-ref field dartifact) + (let* ((spec (car spec-in)) + (aartifact (alist-ref 'aartifact dartifact))) ;; get the artifact alist + (if (and aartifact spec) + (let* ((ptype (alist-ref 'artifact-type dartifact)) + (pspec (alist-ref (string->symbol ptype) spec))) ;; do we have a spec for this type of artifact + (and pspec + (let* ((key (alist-ref field pspec))) + (and key (alist-ref key aartifact))))) + #f)))) + +;; convert a dartifact to a pure alist given a artifactspec +;; this flattens out the alist to include the data from +;; the queue database record +;; +(define (dartifact->alist dartifact artifactspec) + (let* ((aartifact (alist-ref 'aartifact dartifact)) + (artifact-type (or (alist-ref 'artifact-type dartifact) ;; artifact-type is from the database field artifact_type + (alist-ref 'T aartifact))) + (artifact-fields (alist-ref (string->symbol artifact-type) artifactspec)) + (rev-fields (if artifact-fields + (reverse-aspec artifact-fields) + '()))) + (append (map (lambda (entry) + (let* ((artifact-key (car entry)) + (new-key (or (alist-ref artifact-key rev-fields) artifact-key))) + `(,new-key . ,(cdr entry)))) + aartifact) + dartifact))) + +;; convert a list of dartifacts into a list of alists using artifact-spec +;; +(define (dartifacts->alists dartifacts artifact-spec) + (map (lambda (x) + (dartifact->alist x artifact-spec)) + dartifacts)) + +;; Generic flattener, make the tuple and artifact into a single flat alist +;; +;; qry-result-spec is a list of symbols corresponding to each field +;; +(define (flatten-all inlst artifactspec . qry-result-spec) + (map + (lambda (tuple) + (dartifact->alist + (apply dblst->dartifacts tuple qry-result-spec) + artifactspec)) + inlst)) + +;; call like this: +;; (construct-sdat 'a "a data" 'S "S data" ...) +;; returns list of cards +;; ( "A a value" "D 12345678900" ...) +;; +(define (construct-sdat . alldat) + (let ((have-D-card #f)) ;; flag + (if (even? (length alldat)) + (let loop ((type (car alldat)) + (data (cadr alldat)) + (tail (cddr alldat)) + (res '())) + (if (eq? type 'D)(set! have-D-card #t)) + (if (null? tail) + (if have-D-card ;; return the constructed artifact, add a D card if none found + (add-card res type data) + (add-card + (add-card res 'D (current-seconds)) + type data)) + (loop (car tail) + (cadr tail) + (cddr tail) + (add-card res type data)))) + #f))) ;; #f means it failed to create the sdat + +(define (construct-artifact . alldat) + (add-z-card + (apply construct-sdat alldat))) + +;;====================================================================== +;; CONVERTERS +;;====================================================================== + +(define (artifact->sdat artifact) + (map unescape-data (string-split artifact "\n"))) + +;; given a pure artifact return an alist +;; +(define (artifact->alist artifact #!key (artifactspec #f)) + (let ((sdat (cond + ((string? artifact) (artifact->sdat artifact)) + ((list? artifact) artifact) + (else #f)))) + (if artifact + (if artifactspec + (dartifact->alist (list (cons 'aartifact (sdat->alist sdat))) artifactspec) + (sdat->alist sdat)) + #f))) + +;; convert an alist to an sdat +;; in: '((a . "blah")(b . "foo")) +;; out: '("a blah" "b foo") +;; +(define (alist->sdat adat) + (map (lambda (dat) + (conc (car dat) " " (cdr dat))) + adat)) + +;; adat is the incoming alist, aspec is the mapping +;; from incoming key to the artifact key (usually one +;; letter to keep data tight) see the artifactspec at the +;; top of this file +;; +;; NOTE: alists can contain multiple instances of the same key (supported fine by artifacts) +;; but you (obviously I suppose) cannot use alist-ref to access those entries. +;; +(define (alist->artifact adat aspec #!key (ptype #f)(no-d #f)) + (let* ((artifact-type (or ptype + (alist-ref 'T adat) ;; can provide in the incoming alist + #f)) + (artifact-spec (if artifact-type ;; alist of external-key -> key + (or (alist-ref artifact-type aspec) '()) + (if (null? aspec) + '() + (cdar aspec)))) ;; default to first one if nothing specified + (new-alist (map (lambda (dat) + (let* ((key (car dat)) + (val (cdr dat)) + (newkey (or (alist-ref key artifact-spec) + key))) + (cons newkey (escape-data (conc val))))) ;; convert all incoming data (symbols, numbers etc.) to a string and then escape newlines. + adat)) + (new-with-type (if (alist-ref 'T new-alist) + new-alist + (cons `(T . ,artifact-type) new-alist))) + (with-d-card (if (or no-d ;; no timestamp wanted + (alist-ref 'D new-with-type)) + new-with-type + (cons `(D . ,(current-seconds)) + new-with-type)))) + (add-z-card + (alist->sdat with-d-card)))) + +;;====================================================================== +;; D B Q U E U E I N T E R F A C E +;;====================================================================== + +;; artifacts ( +;; id SERIAL PRIMARY KEY, +;; uuid TEXT NOT NULL, +;; parent_uuid TEXT default '', +;; artifact_type INTEGER DEFAULT 0, +;; group_id INTEGER NOT NULL, +;; artifact TEXT NOT NULL + +;; schema is list of SQL statements - can be used to extend db with more tables +;; +(define (open-queue-db dbpath dbfile #!key (schema '())) + (let* ((dbfname (conc dbpath "/" dbfile)) + (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f))) + (db (open-database dbfname))) + ;; (set-busy-handler! (dbi:db-conn db) (busy-timeout 10000)) + (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. + (for-each + (lambda (stmt) + (execute db stmt)) + (cons "CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER PRIMARY KEY, + group_id INTEGER NOT NULL, + uuid TEXT NOT NULL, + parent_uuid TEXT TEXT DEFAULT '', + artifact_type TEXT NOT NULL, + artifact TEXT NOT NULL, + processed INTEGER DEFAULT 0)" + schema))) ;; 0=not processed, 1=processed, 2... for expansion + db)) + +(define (add-to-queue db artifact uuid artifact-type parent-uuid group-id) + (execute db "INSERT INTO artifacts (uuid,parent_uuid,artifact_type,artifact,group_id) + VALUES(?,?,?,?,?);" ;; $1,$2,$3,$4,$5);" + uuid + (if parent-uuid parent-uuid "");; use null string as placeholder for no parent uuid. + (if artifact-type (conc artifact-type) "") + artifact + group-id)) + +;; given all needed parameters create a artifact and store it in the queue +;; procs is an alist that maps artifact-type to a function that takes a list of artifact params +;; in data and returns the uuid and artifact +;; +(define (create-and-queue conn procs artifact-type parent-uuid group-id data) + (let ((proc (alist-ref artifact-type procs))) + (if proc + (let-values (( (uuid artifact) (proc data) )) + (add-to-queue conn artifact uuid artifact-type parent-uuid group-id) + uuid) + #f))) + +;; given uuid get artifact, if group-id is specified use it (reduces probablity of +;; being messed up by a uuid collision) +;; +(define (lookup-by-uuid db artifact-uuid group-id) + (if group-id + (get-one db "SELECT artifact FROM artifacts WHERE group_id=? AND uuid=?;" group-id artifact-uuid) + (get-one db "SELECT artifact FROM artifacts WHERE uuid=?;" artifact-uuid))) + +;; find a packet by its id +;; +(define (lookup-by-id db id) + (get-one db "SELECT artifact FROM artifacts WHERE id=?;" id)) + + +;;====================================================================== +;; P R O C E S S P K T S +;;====================================================================== + +;; given a list of field values pulled from the queue db generate a list +;; of dartifact's +;; +(define (dblst->dartifacts lst . altmap) + (let* ((maplst (if (null? altmap) + '(id group-id uuid parent artifact-type artifact processed) + altmap)) + (res (map cons maplst lst))) ;; produces list of pairs, i.e an alist + (cons `(aartifact . ,(artifact->alist (alist-ref 'artifact res))) + res))) + +;; NB// ptypes is a list of symbols, '() or #f find all types +;; +(define (get-dartifacts db ptypes group-id parent-uuid #!key (uuid #f)) + (let* ((ptype-qry (if (and ptypes + (not (null? ptypes))) + (conc " IN ('" (string-intersperse (map conc ptypes) "','") "')") + (conc " LIKE '%' "))) + (rows (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE artifact_type " ptype-qry " AND group_id=? + AND processed=0 " + (if parent-uuid (conc "AND parent_uuid='" parent-uuid "' ") "") + (if uuid (conc "AND uuid='" uuid "' ") "") + "ORDER BY id DESC;") + group-id))) + (map dblst->dartifacts (map vector->list rows)))) + +;; get N artifacts not yet processed for group-id +;; +(define (get-not-processed-artifacts db group-id artifact-type limit offset) + (map dblst->dartifacts + (map vector->list + (get-rows + db + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE artifact_type = ? AND group_id = ? AND processed=0 + LIMIT ? OFFSET ?;" + (conc artifact-type) ;; convert symbols to string + group-id + limit + offset + )))) + +;; given a uuid, get not processed child artifacts +;; +(define (get-related db group-id uuid) + (map dblst->dartifacts + (get-rows + db + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts + WHERE parent_uuid=? AND group_id=? AND processed=0;" + uuid group-id))) + +;; generic artifact processor +;; +;; find all packets in group-id of type in ptypes and apply proc to artifactdat +;; +(define (process-artifacts conn group-id ptypes parent-uuid proc) + (let* ((artifacts (get-dartifacts conn ptypes group-id parent-uuid))) + (map proc artifacts))) + +;; criteria is an alist ((k . valpatt) ...) +;; - valpatt is a regex +;; - ptypes is a list of types (symbols expected) +;; match-type: 'any or 'all +;; +(define (find-artifacts db ptypes criteria #!key (processed #f)(match-type 'any)(artifact-spec #f)) ;; processed=#f, don't use, else use + (let* ((artifacts (get-dartifacts db ptypes 0 #f)) + (match-rules (lambda (artifactdat) ;; returns a list of matching rules + (filter (lambda (c) + ;; (print "c: " c) + (let* ((ctype (car c)) ;; card type + (rx (cdr c)) ;; card pattern + ;; (t (alist-ref 'artifact-type artifactdat)) + (artifact (alist-ref 'artifact artifactdat)) + (aartifact (artifact->alist artifact)) + (cdat (alist-ref ctype aartifact))) + ;; (print "cdat: " cdat) ;; " aartifact: " aartifact) + (if cdat + (string-match rx cdat) + #f))) + criteria))) + (res (filter (lambda (artifactdat) + (if (null? criteria) ;; looking for all artifacts + #t + (case match-type + ((any)(not (null? (match-rules artifactdat)))) + ((all)(eq? (length (match-rules artifactdat))(length criteria))) + (else + (print "ERROR: bad match type " match-type ", expecting any or all."))))) + artifacts))) + (if artifact-spec + (dartifacts->alists res artifact-spec) + res))) + +;; get descendents of parent-uuid +;; +;; NOTE: Should be doing something like the following: +;; +;; given a uuid, get not processed child artifacts +;; processed: +;; #f => get all +;; 0 => get not processed +;; 1 => get processed +;; +(define (get-ancestors db group-id uuid #!key (processed #f)) + (map dblst->dartifacts + (map vector->list + (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed + FROM artifacts + WHERE uuid IN + (WITH RECURSIVE + tree(uuid,parent_uuid) + AS + ( + SELECT uuid, parent_uuid + FROM artifacts + WHERE uuid = ? + UNION ALL + SELECT t.uuid, t.parent_uuid + FROM artifacts t + JOIN tree ON t.uuid = tree.parent_uuid + ) + SELECT uuid FROM tree) + AND group_id=?" (if processed (conc " AND processed=" processed) "") ";") + uuid group-id)))) + +;; Untested +;; +(define (get-descendents db group-id uuid #!key (processed #f)) + (map dblst->dartifacts + (map vector->list + (get-rows + db + (conc + "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed + FROM artifacts + WHERE uuid IN + (WITH RECURSIVE + tree(uuid,parent_uuid) + AS + ( + SELECT uuid, parent_uuid + FROM artifacts + WHERE uuid = ? + UNION ALL + SELECT t.uuid, t.parent_uuid + FROM artifacts t + JOIN tree ON t.parent_uuid = tree.uuid + ) + SELECT uuid FROM tree) + AND group_id=?" (if processed (conc " AND processed=" processed) "") ";") + uuid group-id)))) + +;; look up descendents based on given info unless passed in a list via inlst +;; +;; (define (get-last-descendent db group-id uuid #!key (processed #f)(inlst #f)) +;; (let ((descendents (or inlst (get-descendents db group-id uuid processed: processed)))) +;; (if (null? descendents) +;; #f +;; (last descendents)))) + +;;====================================================================== +;; A R C H I V E S - always to a sqlite3 db +;;====================================================================== + +;; open an archive db +;; path: archive-dir//month.db +;; +#;(define (archive-open-db archive-dir) + (let* ((curr-time (seconds->local-time (current-seconds))) + (dbpath (conc archive-dir "/" (time->string curr-time "%Y"))) + (dbfile (conc dbpath "/" (time->string curr-time "%m") ".db")) + (dbexists (if (file-exists? dbfile) #t (begin (create-directory dbpath #t) #f)))) + (let ((db (open-database dbfile))) + ;; (set-busy-handler! db (busy-timeout 10000)) + (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. + (execute db "CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER, + group_id INTEGER, + uuid TEXT, + parent_uuid TEXT, + artifact_type TEXT, + artifact TEXT, + processed INTEGER DEFAULT 0)")) + db))) + +;; turn on transactions! otherwise this will be painfully slow +;; +#;(define (write-archive-artifacts src-db db artifact-ids) + (let ((artifacts (get-rows + src-db + (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact FROM artifacts WHERE id IN (" + (string-intersperse (map conc artifact-ids) ",") ")")))) + ;; (dbi:with-transaction + ;; db + (lambda () + (for-each + (lambda (artifact) + (apply execute db "INSERT INTO artifacts (id,group_id,uuid,parent_uuid,artifact_type,artifact) + VALUES (?,?,?,?,?,?)" + artifact)) + artifacts)))) ;; ) + +;; given a list of uuids and lists of uuids move all to +;; the sqlite3 db for the current archive period +;; +#;(define (archive-artifacts conn artifact-ids archive-dir) + (let ((db (archive-open-db archive-dir))) + (write-archive-artifacts conn db artifact-ids) + (finalize! db)) + ;; (pg:with-transaction + ;; conn + ;; (lambda () + (for-each + (lambda (id) + (get-one + conn + "DELETE FROM artifacts WHERE id=?" id)) + artifact-ids)) ;; )) + +;; given a list of ids mark all as processed +;; +(define (mark-processed conn artifact-ids) + ;; (pg:with-transaction + ;; conn + ;; (lambda () + (for-each + (lambda (id) + (get-one + conn + "UPDATE artifacts SET processed=1 WHERE id=?;" id)) + artifact-ids)) ;; x)) + +;; a generic artifact getter, gets from the artifacts db +;; +(define (get-artifacts conn ptypes) + (let* ((ptypes-str (if (null? ptypes) + "" + (conc " WHERE artifact_type IN ('" (string-intersperse ptypes ",") "') "))) + (qry-str (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts" ptypes-str))) + (map vector->list (get-rows conn qry-str)))) + +;; make a report of the artifacts in the db +;; ptypes of '() gets all artifacts +;; display-fields +;; +(define (make-report dest conn artifactspec display-fields . ptypes) + (let* (;; (conn (dbi:db-conn (s:db))) + (all-rows (get-artifacts conn ptypes)) + (all-artifacts (flatten-all + all-rows + artifactspec + 'id 'group-id 'uuid 'parent 'artifact-type 'artifact 'processed)) + (by-uuid (let ((ht (make-hash-table))) + (for-each + (lambda (artifact) + (let ((uuid (alist-ref 'uuid artifact))) + (hash-table-set! ht uuid artifact))) + all-artifacts) + ht)) + (by-parent (let ((ht (make-hash-table))) + (for-each + (lambda (artifact) + (let ((parent (alist-ref 'parent artifact))) + (hash-table-set! ht parent (cons artifact (hash-table-ref/default ht parent '()))))) + all-artifacts) + ht)) + (oup (if dest (open-output-file dest) (current-output-port)))) + + (with-output-to-port + oup + (lambda () + (print "digraph megatest_state_status { + // ranksep=0.05 + rankdir=LR; + node [shape=\"box\"]; +") + ;; first all the names + (for-each + (lambda (artifact) + (let* ((uuid (alist-ref 'uuid artifact)) + (shortuuid (substring uuid 0 4)) + (type (alist-ref 'artifact-type artifact)) + (processed (alist-ref 'processed artifact))) + + (print "\"" uuid "\" [label=\"" shortuuid ", (" + type ", " + (if processed "processed" "not processed") ")") + (for-each + (lambda (key-field) + (let ((val (alist-ref key-field artifact))) + (if val + (print key-field "=" val)))) + display-fields) + (print "\" ];"))) + all-artifacts) + ;; now for parent-child relationships + (for-each + (lambda (artifact) + (let ((uuid (alist-ref 'uuid artifact)) + (parent (alist-ref 'parent artifact))) + (if (not (equal? parent "")) + (print "\"" parent "\" -> \"" uuid"\";")))) + all-artifacts) + + (print "}") + )) + (if dest + (begin + (close-output-port oup) + (system "dot -Tpdf out.dot -o out.pdf"))) + + )) + +;;====================================================================== +;; Read ref artifacts into a vector < laststr hash table > +;;====================================================================== + + + +;;====================================================================== +;; Read/write packets to files (convience functions) +;;====================================================================== + +;; write alist to a artifact file +;; +(define (write-alist->artifact targdir dat #!key (artifactspec '())(ptype #f)) + (let-values (((uuid artifact)(alist->artifact dat artifactspec ptype: ptype))) + (with-output-to-file (conc targdir "/" uuid ".artifact") + (lambda () + (print artifact))) + uuid)) ;; return the uuid + +;; read artifact into alist +;; +(define (read-artifact->alist artifact-file #!key (artifactspec #f)) + (artifact->alist (with-input-from-file + artifact-file + read-string) + artifactspec: artifactspec)) + +;;====================================================================== +;; File utils, stuff useful for file management +;;====================================================================== + +(define (file-get-sha1 fname) + (let* ((sha1-res (run/strings (sha1sum ,fname)))) + (car (string-split (car sha1-res))))) + +(define (link-or-copy srcf destf) + (or (handle-exceptions + exn + #f + (file-link srcf destf)) + (if (file-exists? destf) + (print "NOTE: destination already exists, skipping copy.") + (copy-file srcf destf)))) + +;; (define (files-diff file1 file2) +;; (let* ((diff-res (with-input-from-port +;; (run/port (diff "-q" ,file1 ,file2)) +;; (lambda () +;; (let* ((res (read-line))) +;; (read-lines) +;; res))))) +;; (car (string-split sha1-res)))) +;; + + +(define (check-same file1 file2) + (cond + ((not (and (file-exists? file1)(file-exists? file2))) #f) + ((not (equal? (file-size file1)(file-size file2))) #f) + (else + (let-values (((status run-ok process-id) + (run (diff "-q" ,file1 ,file2)))) + status)))) + +(define *pcache* (make-hash-table)) +(define (get-device dir) + (let ((indat (or (hash-table-ref/default *pcache* dir #f) + (let* ((inp (open-input-pipe (conc "df --output=source \""dir"\""))) + (res (read-lines inp))) + (close-input-port inp) + (hash-table-set! *pcache* dir res) + res)))) + (cadr indat))) + +(define (same-partition? dir1 dir2) + (equal? (get-device dir1)(get-device dir2))) + +(define (link-if-same-partition file1 file2) + (let* ((dir1 (pathname-directory file1)) + (dir2 (pathname-directory file2)) + (f1 (pathname-file file1)) + (f2 (pathname-file file2))) + (if (same-partition? dir1 dir2) + (let* ((tmpname (conc "."f2"-"(current-seconds)))) + ;; this steps needs to be executed as actual user + (move-file file2 (conc dir1 "/" tmpname)) + (file-link file1 file2) + (delete-file (conc dir1 "/" tmpname)))))) + +(define (uuid-first-two-letters sha1sum) + (substring sha1sum 0 2)) + +(define (uuid-remaining-letters sha1sum) + (let ((slen (string-length sha1sum))) + (substring sha1sum 2 slen))) + +(define (archive-dest destd sha1sum) + (let* ((subdir (uuid-first-two-letters sha1sum)) ;; (substring sha1sum 0 2)) + ;; (slen (string-length sha1sum)) + (rem sha1sum #;(uuid-remaining-letters sha1sum)) ;; (substring sha1sum 3 slen)) + (full-dest-dir (conc destd"/"subdir)) + (full-dest-file (conc full-dest-dir"/"rem))) + (if (not (directory-exists? full-dest-dir)) + (create-directory full-dest-dir #t)) + full-dest-file)) + +(define (write-to-archive data destd #!optional (nextnum #f)) + (let* ((sha1sum (calc-sha1 data)) + (full-dest (conc (archive-dest destd sha1sum) + (if nextnum (conc "."nextnum) "")))) + (if (file-exists? full-dest) + (if (equal? (string-intersperse (with-input-from-file full-dest read-lines) "\n") + data) + (begin + ;; (print "INFO: data already exists in "full-dest" and is identical") + sha1sum) + (let ((nextnum (if nextnum (+ nextnum 1) 0))) + (print "WARN: data already exists in "full-dest" but is different! Trying again...") + (write-to-archive data destd nextnum))) + (begin + (with-output-to-file + full-dest + (lambda () + (print data))) + sha1sum)))) ;; BUG? Does print munge data? + +;; copy srcf with sha1sum aabc... to aa/bc... +;; +(define (archive-copy srcf destd sha1sum) + (let* ((full-dest-file (archive-dest destd sha1sum))) + (let loop ((trynum 0)) + (let ((dest-name (if (> trynum 0) + (conc full-dest-file"-"trynum) + full-dest-file))) + (cond + ((not (file-exists? srcf)) #f) ;; this should be an error? + ((and (file-exists? srcf) + (file-exists? dest-name)) + (if (check-same srcf dest-name) + (link-if-same-partition dest-name srcf) + (loop (+ trynum 1)))) ;; collisions are rare, this protects against them + ((not (file-exists? dest-name)) + (link-or-copy srcf dest-name)) + (else #f)))))) + +;; multi-glob +(define (multi-glob globstrs inpath) + ;; (print "multi-glob: "globstrs", "inpath) + (if (equal? inpath "") + globstrs + (let* ((parts (string-split inpath "/" #t)) + (nextpart (car parts)) + (remaining (string-intersperse (cdr parts) "/"))) + (if (and (equal? nextpart "") ;; this must be a leading / meaning root directory + (null? globstrs)) + (multi-glob '("/") remaining) + (begin + ;; (print "nextpart="nextpart", remaining="remaining) + (apply append + (map (lambda (gstr) + (let* ((pathstr (conc gstr"/"nextpart)) + (pathstrs (glob pathstr))) + ;; (print "pathstr="pathstr) + (multi-glob pathstrs remaining))) + globstrs))))))) + + +;; perm[/user:group]: +;; DDD - octal perm (future expansion) +;; - - use umask/defacto perms (i.e. don't actively do anything) +;; x - mark as executable +;; +;; Cards: +;; file: f perm fname +;; directory: d perm fname artifactid +;; link: l perm lname destpath +;; +;; NOTE: cards are kept as (C . "value") +;; +;; given a directory path, ignore list and artifact store (hash-table): +;; 1. create sha1 tree at dest (e.g. aa/b3a7 ...) +;; 2. create artifact for each dir +;; - cards for all files +;; - cards for files that are symlinks or executables +;; 3. return (artifactid . artifact) +;; +;; NOTES: +;; Use destdir of #f to not create sha1 tree +;; Hard links will be used if srcdir and destdir appear to be same partion +;; +;; (alist->artifact adat aspec #!key (ptype #f)) +;; +;; +;; (load "../../artifacts/artifacts.scm")(import big-chicken srfi-69 artifacts)(define dirdat (make-hash-table)) +;; (capture-dir ".." ".." "/tmp/junk" '() dirdat) +;; +;; [procedure] (file-type FILE [LINK [ERROR]]) +;; Returns the file-type for FILE, which should be a filename, a file-descriptor or a port object. If LINK is given and true, symbolic-links are not followed: +;; +;; regular-file +;; directory +;; fifo +;; socket +;; symbolic-link +;; character-device +;; block-device +;; Note that not all types are supported on every platform. If ERROR is given and false, then file-type returns #f if the file does not exist; otherwise, it signals an error. +;; +;; +(define (capture-dir curr-dir src-dir dest-dir ignore-list artifacts all-seen) + (let* ((dir-dat (directory-fold + (lambda (fname res) ;; res is a list of artifact cards + (let* ((fullname (conc curr-dir"/"fname))) + ;; (print "INFO: processing "fullname) + (if (hash-table-ref/default all-seen fullname #f) ;; something circular going on + (begin + (print "WARNING: possible circular link(s) "fullname) + res) + (let* ((ftype (file-type fullname #t #f))) + (hash-table-set! all-seen fullname ftype) + (cons + (case ftype ;; get the card + ((directory) ;; (directory? fullname) + (let* ((new-curr-dir (conc curr-dir"/"fname)) + (new-src-dir (conc src-dir"/"fname))) + (let* ((dir-dat (capture-dir new-curr-dir new-src-dir + dest-dir ignore-list artifacts all-seen)) + (a-id (car dir-dat)) + (artf (cdr dir-dat))) + (hash-table-set! artifacts a-id artf) + (cons 'd (conc "- "a-id" "fname))))) ;; the card + ((symbolic-link) ;; (symbolic-link? fullname) + (let ((ldest (read-symbolic-link fullname))) + (cons 'l (conc "- "fname"/"ldest)))) ;; delimit link name from dest with / + ((regular-file) ;; must be a file + (let* ((start (current-seconds)) + (sha1sum (file-get-sha1 fullname)) + (perms (if (file-executable? fullname) "x" "-"))) + (let ((runtime (- (current-seconds) start))) + (if (> runtime 1) + (print "INFO: file "fullname" took "runtime" seconds to calculate sha1."))) + (if dest-dir + (archive-copy fullname dest-dir sha1sum)) + (cons 'f (conc perms " "sha1sum" "fname)))) + (else + (print "WARNING: file "fullname" of type "ftype" is NOT supported and will converted to empty file.") + (let* ((sha1sum (write-to-archive "" dest-dir))) + (cons 'f (conc "- "sha1sum" "fname))))) + res))))) + '() src-dir #:dotfiles? #t))) ;; => (values srcdir_artifact sub_artifacts_list) + ;; (print "dir-dat: " dir-dat) + (let-values (((a-id artf) + (alist->artifact dir-dat '() ptype: 'd no-d: #t))) + (hash-table-set! artifacts a-id artf) + (cons a-id artf)))) + +;; maybe move this into artifacts? +;; +;; currently moves *.artifact into a bundle and moves the artifacts into attic +;; future: move artifacts under 1 meg in size into bundle up to 10 meg in size +;; +(define (artifact-rollup bundle-dir) ;; cfg storepath) + ;; (let* ((bundle-dir (calc-bundle-dir cfg storepath))) + (let* ((bundles (glob (conc bundle-dir"/*.bundle"))) + (artifacts (glob (conc bundle-dir"/*.artifact")))) + (if (> (length artifacts) 30) ;; rollup only if > 30 artifacts + ;; if we have unbundled artifacts, bundle them + (let* ((ht (read-artifacts-into-hash #f artifacts: artifacts)) + (bundle (hash-of-artifacts->bundle ht))) + (write-bundle bundle bundle-dir) + (create-directory (conc bundle-dir"/attic") #t) + (for-each + (lambda (full-fname) + (let* ((fname (pathname-strip-directory full-fname)) + (newname (conc bundle-dir"/attic/"fname))) + (move-file full-fname newname #t))) + artifacts) + (conc "bundled "(length artifacts))) + "not enough artifacts to bundle"))) + +;; if destfile is a directory then calculate the sha1sum of the bundle and store it +;; by .bundle +;; +;; incoming dat is pure text (bundle already sorted and appended: +;; +(define (write-bundle bdl-data destdir) + (let* ((bdl-uuid (calc-sha1 bdl-data))) + (with-output-to-file + (conc destdir"/"bdl-uuid".bundle") + (lambda () + (print bdl-data))))) + +;; minimal (and hopefully fast) artifact reader +;; TODO: Add check of shar sum. +;; +(define (minimal-artifact-read fname) + (let* ((indat (with-input-from-file fname read-lines))) + (if (null? indat) + (values #f (conc "did not find an artifact in "fname)) + (let* ((zcard (last indat)) + (cardk (substring zcard 0 1)) + (cardv (substring zcard 2 (string-length zcard)))) + (if (equal? cardk "Z") + (values cardv (string-intersperse indat "\n")) + (values #f (conc fname" is not a valid artifact"))))))) + +;; read artifacts from directory into hash +;; NOTE: support for max-count not implemented yet +;; +(define (read-artifacts-into-hash dir #!key (artifacts #f) (max-count #f)(ht #f)) + (let* ((artifacts (or artifacts + (glob (conc dir"/*.artifact")))) + (ht (or ht (make-hash-table)))) + (for-each + (lambda (fname) + (let-values (((uuid afct) + (minimal-artifact-read fname))) + (hash-table-set! ht uuid afct))) + artifacts) + ht)) + +;; ht is: +;; uuid => artifact text +;; use write-bundle to put result into a bundle file +;; +(define (hash-of-artifacts->bundle ht) + (fold (lambda (k res) + (let* ((v (hash-table-ref ht k))) + (if res + (conc res"\n"v) + v))) + #f + (sort (hash-table-keys ht) string<=?))) + +;; minimal artifact to alist +;; +(define (minimal-artifact->alist afact) + (let* ((lines (string-split afact "\n"))) + (map (lambda (a) + (let* ((key (string->symbol (substring a 0 1))) + (sl (string-length a)) + (val (if (> sl 2) + (substring a 2 sl) + ""))) + (cons key val))) + lines))) + +;; some accessors for common cards +(define (afact-get-D afact) + (let ((dval (alist-ref 'D afact))) + (if dval + (string->number dval) + #f))) + +(define (afact-get-T afact) ;; get the artifact type as a symbol + (let ((val (alist-ref 'T afact))) + (if val + (string->symbol val) + val))) + +(define (afact-get-Z afact) + (alist-ref 'Z afact)) + +(define (afact-get afact key default) + (or (alist-ref key afact) + default)) + +(define (afact-get-number/default afact key default) + (let ((val (alist-ref key afact))) + (if val + (or (string->number val) default) ;; seems wrong + default))) + +;; bundles are never big and reading into memory for processing is fine +;; +(define (read-bundle srcfile #!optional (mode 'uuid-raw)) + (let* ((indat (with-input-from-file srcfile read-lines))) + (let loop ((tail indat) + (dat '()) ;; artifact being extracted + (res '())) ;; list of artifacts + (if (null? tail) + (reverse res) ;; last dat should be empty list + (let* ((curr-line (car tail))) + (let-values (((ctype cdata) + (card->type/value curr-line))) + (let* ((is-z-card (eq? 'Z ctype)) + (new-dat (cons (case mode + ((uuid-raw) curr-line) + (else (cons ctype cdata))) + dat))) + (if is-z-card + (loop (cdr tail) ;; done with this artifact + '() + (cons (case mode + ((uuid-raw) (cons cdata (string-intersperse (reverse new-dat) "\n"))) + (else (reverse new-dat))) + res)) + (loop (cdr tail) + new-dat + res))))))))) + + +;; find all .bundle and .artifacts files in bundle-dir +;; and inport them into sqlite handle adb +;; +(define (refresh-artifacts-db adb bundle-dir) + (let* ((bundles (glob (conc bundle-dir"/*.bundle"))) + (artifacts (glob (conc bundle-dir"/*.artifact"))) + (uuids (get-all-uuids adb 'hash))) + (with-transaction + adb + (lambda () + (for-each + (lambda (bundle-file) + ;; (print "Importing artifacts from "bundle-file) + (let* ((bdat (read-bundle bundle-file 'uuid-raw)) + (count 0) + (inc (lambda ()(set! count (+ count 1))))) + (for-each + (lambda (adat) + (match + adat + ((zval . artifact) + (if (not (hash-table-exists? uuids zval)) + (begin + ;; (print "INFO: importing new artifact "zval" from bundle "bundle-file) + (inc) + (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);" + zval artifact) + (hash-table-set! uuids zval #t)))) + (else + (print "ERROR: Bad artifact data "adat)))) + bdat) + (print "INFO: imported "count" artifacts from "bundle-file))) + bundles) + (for-each + (lambda (artifact-file) + ;; (print "Importing artifact from "artifact-file) + (let-values (((uuid artifact) (minimal-artifact-read artifact-file))) + (if uuid + (if (not (hash-table-exists? uuids uuid)) + (begin + ;; (print "INFO: importing new artifact "uuid" from "artifact-file) + (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);" + uuid artifact) + (hash-table-set! uuids uuid #t))) + (print "Bad artifact in "artifact-file)))) + artifacts))))) + +;;====================================================================== +;; Artifacts db cache +;;====================================================================== + +;; artifacts +;; id SERIAL PRIMARY KEY, +;; uuid TEXT NOT NULL, +;; artifact TEXT NOT NULL +;; +;; parents +;; id INTEGER REFERENCES artids.id, -- +;; parent_id REFERENCES artids.id +;; +;; schema is list of SQL statements - can be used to extend db with more tables +;; +(define (open-artifacts-db dbpath dbfile #!key (schema '())) + (let* ((dbfname (conc dbpath "/" dbfile)) + (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f))) + (adb (open-database dbfname))) + (set-busy-handler! adb (make-busy-timeout 10000)) + (execute adb "PRAGMA synchronous = 0;") + (if (not dbexists) + (with-transaction + adb + (lambda () + (for-each + (lambda (stmt) + (execute adb stmt)) + (append `("CREATE TABLE IF NOT EXISTS artifacts + (id INTEGER PRIMARY KEY, + uuid TEXT NOT NULL, + artifact TEXT NOT NULL)" + + "CREATE TABLE IF NOT EXISTS parents + (id INTEGER REFERENCES artifacts(id) NOT NULL, + parent_id INTEGER REFERENCES artifacts(id) NOT NULL)") + schema))))) + adb)) + +(define (generate-year-month-name #!optional (seconds #f)) + (let* ((curr-time (seconds->local-time (or seconds (current-seconds))))) + (time->string curr-time "%Y%m"))) + +;; I don't like this function. TODO: remove the +;; mode and option to return ht. Use instead the +;; get-all-artifacts below +;; +(define (get-all-uuids adb #!optional (mode #f)) + (let* ((res (fold-row + (lambda (res uuid) + (cons uuid res)) + '() + adb + "SELECT uuid FROM artifacts;"))) + (case mode + ((hash) + (let* ((ht (make-hash-table))) + (for-each + (lambda (uuid) + (hash-table-set! ht uuid #t)) + res) + ht)) + (else res)))) + +;; returns raw artifacts (i.e. NOT alists but instead plain text) +(define (get-all-artifacts adb) + (let* ((ht (make-hash-table))) + (for-each-row + (lambda (id uuid artifact) + (hash-table-set! ht uuid `(,id ,uuid ,artifact))) + adb + "SELECT id,uuid,artifact FROM artifacts;") + ht)) + +;; given a bundle-dir copy or create to /tmp and open +;; the YYMM.db file and hand the handle to the given proc +;; NOTE: we operate in /tmp/ to accomodate users on NFS +;; where slamming Unix locks at an NFS filer can cause +;; locking fails. Eventually this /tmp behavior will be +;; configurable. +;; +(define (with-todays-adb bundle-dir proc) + (let* ((dbname (conc (generate-year-month-name) ".db")) + (destname (conc bundle-dir"/"dbname)) + (tmparea (conc "/tmp/"(current-user-name)"-"(calc-sha1 bundle-dir))) + (tmpname (conc tmparea"/"dbname)) + (lockfile (conc destname".update-in-progress"))) + ;; (print "with-todays-adb, bundle-dir: "bundle-dir", dbname: "dbname", destname: "destname",\n tmparea: " tmparea", lockfile: "lockfile) + (if (not (file-exists? tmparea))(create-directory tmparea #t)) + (let loop ((count 0)) + (if (file-exists? lockfile) + (if (< count 30) ;; aproximately 30 seconds + (begin + (sleep 1) + (loop (+ 1 count))) + (print "ERROR: "lockfile" exists, proceeding anyway")) + (if (file-exists? destname) + (begin + (copy-file destname tmpname #t) + (copy-file destname lockfile #t))))) + (let* ((adb (open-artifacts-db tmparea dbname)) + (res (proc adb))) + (finalize! adb) + (copy-file tmpname destname #t) + (delete-file* lockfile) + res))) + +) ;; module artifacts + +;; ATTIC + ADDED artifacts/artifacts.setup Index: artifacts/artifacts.setup ================================================================== --- /dev/null +++ artifacts/artifacts.setup @@ -0,0 +1,11 @@ +;; Copyright 2007-2017, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;;; pkts.setup +(standard-extension 'pkts "1.0") ADDED artifacts/artifactsrec.scm Index: artifacts/artifactsrec.scm ================================================================== --- /dev/null +++ artifacts/artifactsrec.scm @@ -0,0 +1,196 @@ +(define-syntax define-record-type + (syntax-rules () + ((define-record-type type + (constructor constructor-tag ...) + predicate + (field-tag accessor . more) ...) + (begin + (define type + (make-record-type 'type '(field-tag ...))) + (define constructor + (record-constructor type '(constructor-tag ...))) + (define predicate + (record-predicate type)) + (define-record-field type field-tag accessor . more) + ...)))) + +; An auxilliary macro for define field accessors and modifiers. +; This is needed only because modifiers are optional. + +(define-syntax define-record-field + (syntax-rules () + ((define-record-field type field-tag accessor) + (define accessor (record-accessor type 'field-tag))) + ((define-record-field type field-tag accessor modifier) + (begin + (define accessor (record-accessor type 'field-tag)) + (define modifier (record-modifier type 'field-tag)))))) + +; Record types + +; We define the following procedures: +; +; (make-record-type ) -> +; (record-constructor ) -> +; (record-predicate ) -> +; (record-accessor ) -> +; (record-modifier ) -> +; where +; ( ...) -> +; ( ) -> +; ( ) -> +; ( ) -> + +; Record types are implemented using vector-like records. The first +; slot of each record contains the record's type, which is itself a +; record. + +(define (record-type record) + (record-ref record 0)) + +;---------------- +; Record types are themselves records, so we first define the type for +; them. Except for problems with circularities, this could be defined as: +; (define-record-type :record-type +; (make-record-type name field-tags) +; record-type? +; (name record-type-name) +; (field-tags record-type-field-tags)) +; As it is, we need to define everything by hand. + +(define :record-type (make-record 3)) +(record-set! :record-type 0 :record-type) ; Its type is itself. +(record-set! :record-type 1 ':record-type) +(record-set! :record-type 2 '(name field-tags)) + +; Now that :record-type exists we can define a procedure for making more +; record types. + +(define (make-record-type name field-tags) + (let ((new (make-record 3))) + (record-set! new 0 :record-type) + (record-set! new 1 name) + (record-set! new 2 field-tags) + new)) + +; Accessors for record types. + +(define (record-type-name record-type) + (record-ref record-type 1)) + +(define (record-type-field-tags record-type) + (record-ref record-type 2)) + +;---------------- +; A utility for getting the offset of a field within a record. + +(define (field-index type tag) + (let loop ((i 1) (tags (record-type-field-tags type))) + (cond ((null? tags) + (error "record type has no such field" type tag)) + ((eq? tag (car tags)) + i) + (else + (loop (+ i 1) (cdr tags)))))) + +;---------------- +; Now we are ready to define RECORD-CONSTRUCTOR and the rest of the +; procedures used by the macro expansion of DEFINE-RECORD-TYPE. + +(define (record-constructor type tags) + (let ((size (length (record-type-field-tags type))) + (arg-count (length tags)) + (indexes (map (lambda (tag) + (field-index type tag)) + tags))) + (lambda args + (if (= (length args) + arg-count) + (let ((new (make-record (+ size 1)))) + (record-set! new 0 type) + (for-each (lambda (arg i) + (record-set! new i arg)) + args + indexes) + new) + (error "wrong number of arguments to constructor" type args))))) + +(define (record-predicate type) + (lambda (thing) + (and (record? thing) + (eq? (record-type thing) + type)))) + +(define (record-accessor type tag) + (let ((index (field-index type tag))) + (lambda (thing) + (if (and (record? thing) + (eq? (record-type thing) + type)) + (record-ref thing index) + (error "accessor applied to bad value" type tag thing))))) + +(define (record-modifier type tag) + (let ((index (field-index type tag))) + (lambda (thing value) + (if (and (record? thing) + (eq? (record-type thing) + type)) + (record-set! thing index value) + (error "modifier applied to bad value" type tag thing))))) + +Records + +; This implements a record abstraction that is identical to vectors, +; except that they are not vectors (VECTOR? returns false when given a +; record and RECORD? returns false when given a vector). The following +; procedures are provided: +; (record? ) -> +; (make-record ) -> +; (record-ref ) -> +; (record-set! ) -> +; +; These can implemented in R5RS Scheme as vectors with a distinguishing +; value at index zero, providing VECTOR? is redefined to be a procedure +; that returns false if its argument contains the distinguishing record +; value. EVAL is also redefined to use the new value of VECTOR?. + +; Define the marker and redefine VECTOR? and EVAL. + +(define record-marker (list 'record-marker)) + +(define real-vector? vector?) + +(define (vector? x) + (and (real-vector? x) + (or (= 0 (vector-length x)) + (not (eq? (vector-ref x 0) + record-marker))))) + +; This won't work if ENV is the interaction environment and someone has +; redefined LAMBDA there. + +(define eval + (let ((real-eval eval)) + (lambda (exp env) + ((real-eval `(lambda (vector?) ,exp)) + vector?)))) + +; Definitions of the record procedures. + +(define (record? x) + (and (real-vector? x) + (< 0 (vector-length x)) + (eq? (vector-ref x 0) + record-marker))) + +(define (make-record size) + (let ((new (make-vector (+ size 1)))) + (vector-set! new 0 record-marker) + new)) + +(define (record-ref record index) + (vector-ref record (+ index 1))) + +(define (record-set! record index value) + (vector-set! record (+ index 1) value)) ADDED artifacts/tests/run.scm Index: artifacts/tests/run.scm ================================================================== --- /dev/null +++ artifacts/tests/run.scm @@ -0,0 +1,139 @@ +(use test) + +;; (use (prefix pkts pkts:)) +(use pkts (prefix dbi dbi:)) +;; (use trace)(trace sdat->alist pkt->alist) + +(if (file-exists? "queue.db")(delete-file "queue.db")) + +(test-begin "pkts and pkt archives") + +;;====================================================================== +;; Basic pkt creation, parsing and conversion routines +;;====================================================================== + +(test-begin "basic packets") +(test #f '(A "This is a packet") (let-values (((t v) + (card->type/value "A This is a packet"))) + (list t v))) +(test #f "A A\nZ 664449e7299e0065a3e25c138ccef2df13ba291e" + (let-values (((uuid res) + (add-z-card '("A A")))) + res)) +(test #f '("CC C++" "D 1486332719.0" "a A" "b C")(sort (construct-sdat 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0) + string<=?)) +(define pkt-example #f) +(test #f "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" + (let-values (((uuid res) + (construct-pkt 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0))) + (set! pkt-example (cons uuid res)) + res)) +(test-end "basic packets") + +;;====================================================================== +;; Sqlite and postgresql based queue of pkts +;;====================================================================== + +(test-begin "pkt queue") +(define db #f) +(test #f 'sqlite3 (let ((dbh (open-queue-db "." "queue.db"))) + (set! db dbh) + (dbi:db-dbtype dbh))) +(test #f (cdr pkt-example) + (begin + (add-to-queue db (cdr pkt-example)(car pkt-example) 'basic #f 0) + (lookup-by-uuid db (car pkt-example) 0))) +(test #f (cdr pkt-example) + (lookup-by-id db 1)) +(test #f 1 (length (find-pkts db '(basic) '()))) + +(test-end "pkt queue") + + +;;====================================================================== +;; Process groups of pkts +;;====================================================================== + +(test-begin "lists of packets") +(test #f '((apkt . #f) (id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) ;; ((id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) + (dblst->dpkts '(1 2 3 4 5))) +(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0))) + ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + (get-dpkts db '(basic) 0 #f)) +(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0))) + ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + (get-not-processed-pkts db 0 'basic 1000 0)) +(test-end "lists of packets") + +(test-begin "pkts as alists") +(define pktspec '((posting . ((title . t) ;; NOTE: same as ((posting (title . t)(url . u)(blub . b)) ... + (url . u) + (blurb . b))) + (comment . ((comment . c) + (score . s))) + (basic . ((b-field . b) + (a-field . a))))) +(define pktlst (find-pkts db '(basic) '())) +(define dpkt (car pktlst)) +(test #f "A" (get-value 'a-field dpkt pktspec)) + +(test #f "C" (alist-ref 'b-field (dpkt->alist dpkt pktspec))) + +(define basic-spec '((nada (foo . b)(bar . f))(basic (foo . f)(bar . b)))) +(define test-pkt '((foo . "fooval")(bar . "barval"))) +(let*-values (((u p) (alist->pkt test-pkt basic-spec ptype: 'basic)) + ((apkt) (pkt->alist p)) + ((bpkt) (pkt->alist p pktspec: basic-spec))) + (test #f "fooval" (alist-ref 'f apkt)) + (test #f "fooval" (alist-ref 'foo bpkt)) + (test #f #f (alist-ref 'f bpkt))) + +(test-end "pkts as alists") + +(test-begin "descendents and ancestors") + +(define (get-uuid pkt)(alist-ref 'uuid pkt)) + +;; add a child to 263e +(let-values (((uuid pkt) + (construct-pkt 'x "X" 'y "Y" 'P "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" + 'D "1486332719.0"))) + (add-to-queue db pkt uuid 'basic "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" 0)) + +(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8") + (map (lambda (x)(alist-ref 'uuid x)) + (get-descendents + db 0 + "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"))) + +(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8") + (map (lambda (x)(alist-ref 'uuid x)) + (get-ancestors + db 0 + "818fe30988c9673441b8f203972a8bda6af682f8"))) + +(test-end "descendents and ancestors") + +(test-end "pkts and pkt archives") + +(test-begin "pktsdb") + +(define spec '((tests (testname n TEXT) + (testpath p TEXT) + (duration d INTEGER)))) +;; (define pktsdb (make-pktdb)) +;; (pktdb-pktsdb-spec-set! pktsdb spec) + +(define pktsdb #f) + +(test #f #t (dbi:database? (let ((pdb (pktsdb-open "test.db" spec))) + (set! pktsdb pdb) + (pktdb-conn pdb)))) +;; (pp (pktdb-pktspec pktsdb)) +(test #f #t (pktsdb-add-record pktsdb 'tests '((testname . "test1")))) + +(pktsdb-close pktsdb) + +(test-end "pktsdb") Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -28,10 +28,17 @@ (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +(module client +* + +) + +(import client) + (include "common_records.scm") (include "db_records.scm") ;; client:get-signature (define (client:get-signature) @@ -44,13 +51,10 @@ #;(define (client:logout serverdat) (let ((ok (and (socket? serverdat) (cdb:logout serverdat *toppath* (client:get-signature))))) ok)) -(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) - (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) - ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline @@ -60,17 +64,27 @@ ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; -(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) +;;(define (http-transport:server-dat-make-url runremote) +(define (client:get-url runremote) + (if (and (remote-iface runremote) + (remote-port runremote)) + (conc "http://" + (remote-iface runremote) + ":" + (remote-port runremote)) + #f)) + +(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) (mutex-lock! *rmt-mutex*) - (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) + (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) (mutex-unlock! *rmt-mutex*) res)) -(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) +(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0)) (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) (server:start-and-wait areapath) (if (<= remaining-tries 0) (begin (debug:print-error 0 *default-log-port* "failed to start or connect to server") @@ -77,52 +91,72 @@ (exit 1)) ;; ;; Alternatively here, we can get the list of candidate servers and work our way ;; through them searching for a good one. ;; - (let* ((server-dat (server:choose-server areapath 'best)) - (runremote (or area-dat *runremote*))) + (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid +;; (runremote (or area-dat *runremote*))) (if (not server-dat) ;; no server found - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) + (begin + (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) (match server-dat ((host port start-time server-id pid) (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if (and (not area-dat) - (not *runremote*)) - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) + (if (not runremote) + (begin + ;; Here we are creating a runremote where there was none or it was clobbered with #f + ;; + (set! runremote (make-remote)) + (let* ((server-info (server:check-if-running areapath))) + (remote-server-info-set! runremote server-info) (if server-info (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))))) + (remote-server-url-set! runremote (server:record->url server-info)) + (remote-server-id-set! runremote (server:record->id server-info))))))) + ;; at this point we have a runremote (if (and host port server-id) - (let* ((start-res (http-transport:client-connect host port server-id)) - (ping-res (rmt:login-no-auto-client-setup start-res))) - (if (and start-res - ping-res) - (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago - (if runremote - (begin - (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) - (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) - start-res) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))) + (let* ((nada (client:connect host port server-id runremote)) + (ping-res (rmt:login-no-auto-client-setup runremote))) + (if ping-res + (if runremote + (begin + (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote)) + runremote) + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 - (case *transport-type* - ((http)(http-transport:close-connections))) - (if *runremote* - (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) - ) + (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 + (http-transport:close-connections runremote) (thread-sleep! 1) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered ;; (server:kind-run areapath) (server:start-and-wait areapath) (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))) + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))))) (else (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat))))))) +;; +;; connect - stored in remote-condat +;; +;; (define (http-transport:client-connect iface port server-id runremote) +(define (client:connect iface port server-id runremote-in) + (let* ((runremote (or runremote-in + (make-runremote)))) + (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri))) + ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) + (remote-iface-set! runremote iface) + (remote-port-set! runremote port) + (remote-server-id-set! runremote server-id) + (remote-connect-time-set! runremote (current-seconds)) + (remote-last-access-set! runremote (current-seconds)) + (remote-api-url-set! runremote api-url) + (remote-api-uri-set! runremote api-uri) + (remote-api-req-set! runremote api-req) + runremote))) + Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -315,26 +315,34 @@ (else "FAIL"))) (define (common:logpro-exit-code->test-status exit-code) (status-sym->string (common:logpro-exit-code->status-sym exit-code))) +;; (defstruct remote (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) (cons #f #f)))) (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) res)) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) - (server-info (if *toppath* (server:check-if-running *toppath*) #f)) + (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive - (connect-time (current-seconds)) - (conndat #f) - (transport *transport-type*) + (connect-time (current-seconds)) ;; when we first connected + (last-access (current-seconds)) ;; last time we talked to server + (conndat #f) ;; iface port api-uri api-url api-req seconds server-id (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode + (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode + + ;; conndat stuff + (iface #f) ;; TODO: Consolidate this data with server-url and server-info above + (port #f) + (api-url #f) + (api-uri #f) + (api-req #f)) ;; launching and hosts (defstruct host (reachable #f) (last-update 0) @@ -408,13 +416,22 @@ (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) + +;; From 1.70 to 1.80, db's are compatible. + (define (common:api-changed?) - (not (equal? (substring (->string megatest-version) 0 4) - (substring (conc (common:get-last-run-version)) 0 4)))) + (let* ( + (megatest-major-version (substring (->string megatest-version) 0 4)) + (run-major-version (substring (conc (common:get-last-run-version)) 0 4)) + ) + (and (not (equal? megatest-major-version "1.80")) + (not (equal? megatest-major-version megatest-run-version))) + ) +) ;;====================================================================== ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; @@ -520,11 +537,11 @@ (define (common:rotate-logs) (let* ((all-files (make-hash-table)) (stats (make-hash-table)) (inc-stat (lambda (key) (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) - (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age + (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) (handle-exceptions exn @@ -599,14 +616,15 @@ ;;====================================================================== ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; Do NOT check if not on homehost! ;; (define (common:exit-on-version-changed) - (if (common:on-homehost?) + (if (and *toppath* ;; do nothing if *toppath* not yet provided + (common:on-homehost?)) (if (common:api-changed?) (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".megatest/main.db")) (read-only (not (file-write-access? dbfile))) (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) (debug:print 0 *default-log-port* "WARNING: Version mismatch!\n" " expected: " (common:version-signature) "\n" @@ -626,14 +644,14 @@ (common:cleanup-db dbstruct))) ((not (common:file-exists? mtconf)) (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") (exit 1)) ((not (common:file-exists? dbfile)) - (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") + (debug:print 0 *default-log-port* " .megatest/main.db does not exist in this area. Cannot proceed with megatest version migration.") (exit 1)) ((not (eq? (current-user-id)(file-owner mtconf))) - (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") + (debug:print 0 *default-log-port* " You do not own .megatest/main.db in this area. Cannot proceed with megatest version migration.") (exit 1)) (read-only (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") (exit 1)) (else @@ -710,19 +728,21 @@ "")))) (define (common:alist-ref/default key alist default) (or (alist-ref key alist) default)) -(define (common:low-noise-print waitval . keys) - (let* ((key (string-intersperse (map conc keys) "-" )) - (lasttime (hash-table-ref/default *common:denoise* key 0)) - (currtime (current-seconds))) - (if (> (- currtime lasttime) waitval) - (begin - (hash-table-set! *common:denoise* key currtime) - #t) - #f))) +;; moved into commonmod +;; +;; (define (common:low-noise-print waitval . keys) +;; (let* ((key (string-intersperse (map conc keys) "-" )) +;; (lasttime (hash-table-ref/default *common:denoise* key 0)) +;; (currtime (current-seconds))) +;; (if (> (- currtime lasttime) waitval) +;; (begin +;; (hash-table-set! *common:denoise* key currtime) +;; #t) +;; #f))) (define (common:get-megatest-exe) (or (getenv "MT_MEGATEST") "megatest")) (define (common:read-encoded-string instr) @@ -946,20 +966,21 @@ (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) (exit 1)) - (let* ((tsname (common:get-testsuite-name)) + (let* ((toppath (common:real-path *toppath*)) + (tsname (common:get-testsuite-name)) (dbpath (common:get-create-writeable-dir (list (conc "/tmp/" (current-user-name) "/megatest_localdb/" tsname "/" - (string-translate *toppath* "/" ".")) + (string-translate toppath "/" ".")) (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name "/megatest_localdb/" tsname - (string-translate *toppath* "/" ".")) + (string-translate toppath "/" ".")) )))) (set! *db-cache-path* dbpath) ;; ensure megatest area has .megatest (let ((dbarea (conc *toppath* "/.megatest"))) (if (not (file-exists? dbarea)) @@ -980,12 +1001,13 @@ ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (common:run-sync?) - (and (common:on-homehost?) - (args:get-arg "-server"))) + (and *toppath* ;; gate if called before *toppath* is set + (common:on-homehost?) + (args:get-arg "-server"))) (define (common:human-time) (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) @@ -1598,10 +1620,30 @@ path) ;; just give up (with-input-from-pipe (conc "/bin/readlink -f " path) (lambda () (read-line))))) + +;; for reasons I don't understand multiple calls to real-path in parallel threads +;; must be protected by mutexes +;; +(define (common:real-path inpath) + ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) + ;; (let-values + ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) + ;; (with-input-from-port inp + ;; (let loop ((inl (read-line)) + ;; (res #f)) + ;; (print "inl=" inl) + ;; (if (eof-object? inl) + ;; (begin + ;; (close-input-port inp) + ;; (close-output-port oup) + ;; ;; (process-wait pid) + ;; res) + ;; (loop (read-line) inl)))))) + (with-input-from-pipe (conc "readlink -f " inpath) read-line)) ;;====================================================================== ;; returns *effective load* (not normalized) ;; (define (common:get-intercept onemin fivemin) @@ -1989,10 +2031,17 @@ (host-last-used-set! rec curr-time) new-best) (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) (define (common:wait-for-homehost-load maxnormload msg) + (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test... + (if (not *toppath*) + (begin + (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.") + (thread-sleep! 30) + (if (< (- (current-seconds) start-time) 300) + (loop start-time))))) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f (server:choose-server *toppath* 'homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) @@ -2218,30 +2267,10 @@ (uname #f)) (if (null? (car uname-res)) "unknown" (caar uname-res)))) -;; for reasons I don't understand multiple calls to real-path in parallel threads -;; must be protected by mutexes -;; -(define (common:real-path inpath) - ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) - ;; (let-values - ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) - ;; (with-input-from-port inp - ;; (let loop ((inl (read-line)) - ;; (res #f)) - ;; (print "inl=" inl) - ;; (if (eof-object? inl) - ;; (begin - ;; (close-input-port inp) - ;; (close-output-port oup) - ;; ;; (process-wait pid) - ;; res) - ;; (loop (read-line) inl)))))) - (with-input-from-pipe (conc "readlink -f " inpath) read-line)) - ;;====================================================================== ;; D I S K S P A C E ;;====================================================================== (define (common:get-disk-space-used fpath) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -359,13 +359,15 @@ (define (db:cache-for-read-only source target #!key (use-last-update #f)) (if (and (hash-table-ref/default *global-db-store* target #f) (>= (file-modification-time target)(file-modification-time source))) (hash-table-ref *global-db-store* target) (let* ((toppath (launch:setup)) - (targ-db-last-mod (if (common:file-exists? target) - (file-modification-time target) - 0)) + (targ-db-last-mod (db:get-sqlite3-mod-time target)) +;; (if (common:file-exists? target) +;; BUG: This needs to include wal mode stuff .shm etc. +;; (file-modification-time target) +;; 0)) (cache-db (or (hash-table-ref/default *global-db-store* target #f) (db:open-megatest-db path: target))) (source-db (db:open-megatest-db path: source)) (curr-time (current-seconds)) (res '()) @@ -405,35 +407,54 @@ ;; use-last-update: #t))) ;; (thread-start! th1) ;; (apply proc cache-db params) ;; )))) - - - +(define (db:get-sqlite3-mod-time fname) + (let* ((wal-file (conc fname "-wal")) + (shm-file (conc fname "-shm")) + (get-mtime (lambda (f) + (if (and (file-exists? f) + (file-read-access? f)) + (file-modification-time f) + 0)))) + (max (get-mtime fname) + (get-mtime wal-file) + (get-mtime shm-file)))) + (define (db:all-db-sync dbstruct) (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) (data-synced 0) ;; count of changed records (tmp-area (common:get-db-tmp-area)) (dbfiles (glob (conc tmp-area"/.megatest/*.db"))) (sync-durations (make-hash-table)) (no-sync-db (db:open-no-sync-db))) (for-each - (lambda (file) + (lambda (file) ;; tmp db file (debug:print-info 3 *default-log-port* "file: " file) - (let* ((fname (conc (pathname-file file) ".db")) - (fulln (conc *toppath*"/.megatest/"fname)) - (time1 (if (file-exists? file) - (file-modification-time file) - (begin - (debug:print-info 2 *default-log-port* "Sync - I do not see file "file) - 1))) - (time2 (if (file-exists? fulln) - (file-modification-time fulln) - (begin - (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln) - 0))) + (let* ((fname (conc (pathname-file file) ".db")) ;; fname is tmp db file + (wal-file (conc fname "-wal")) + (shm-file (conc fname "-shm")) + (fulln (conc *toppath*"/.megatest/"fname)) ;; fulln is nfs db name + (wal-time (if (file-exists? wal-file) + (file-modification-time wal-file) + 0)) + (shm-time (if (file-exists? shm-file) + (file-modification-time shm-file) + 0)) + (time1 (db:get-sqlite3-mod-time file)) +;; (if (file-exists? file) ;; time1 is the max itime of the tmp db, -wal and -shm files. +;; (max (file-modification-time file) wal-time shm-time) +;; (begin +;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "file) +;; 1))) + (time2 (db:get-sqlite3-mod-time fulln)) +;; (if (file-exists? fulln) ;; time2 is nfs file time +;; (file-modification-time fulln) +;; (begin +;; (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln) +;; 0))) (changed (> (- time1 time2) (+ (random 5) 1))) ;; it has been at some few seconds since last synced (changed10 (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd (jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy? (do-cp (cond ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover @@ -484,12 +505,11 @@ (dest-area (if old2new tmp-area *toppath*)) (dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db")))) (keys (db:get-keys dbstruct)) (sync-durations (make-hash-table))) - - (if killservers + (if (and killservers servers) (begin (for-each (lambda (server) (handle-exceptions exn @@ -501,10 +521,13 @@ (tasks:kill-server host pid))))) servers) (delete-file* (common:get-sync-lock-filepath)) ) ) + + (if (not dbfiles) + (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.megatest")) (for-each (lambda (srcfile) (debug:print-info 3 *default-log-port* "file: " srcfile) (let* ((fname (conc (pathname-file srcfile) ".db")) (basename (pathname-file srcfile)) @@ -525,10 +548,11 @@ (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds (do-cp (cond ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile) + ;; TODO: Need to fix this for WAL mod. Can't just copy. (system (conc "/bin/mkdir -p " dest-directory)) (system (conc "/bin/cp " srcfile " " destfile)) #t) (changed ;; (and changed ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed. @@ -562,14 +586,17 @@ (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date") ) ) ) dbfiles + ) ) data-synced ) ) + + ;; Sync all changed db's ;; (define (db:tmp->megatest.db-sync dbstruct run-id last-update) (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))) @@ -2029,18 +2056,19 @@ (lambda (dbdat db) (if msg (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) +(define (db:set-run-state-status-db db run-id state status ) + (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)) + (define (db:set-run-state-status dbstruct run-id state status ) (db:with-db dbstruct #f #f (lambda (dbdat db) - (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) - - - + (db:set-run-state-status-db db run-id state status)))) + (define (db:get-run-status dbstruct run-id) (let ((res "n/a")) (db:with-db dbstruct #f #f (lambda (dbdat db) @@ -2306,12 +2334,12 @@ (sqlite3:for-each-row (lambda (run-id testname item-path state status) ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) db - "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" - test-id))) + "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" + test-id run-id))) res)) ;; get a useful subset of the tests data (used in dashboard ;; use db:mintest-get-{id ,run_id,testname ...} ;; @@ -2402,25 +2430,28 @@ ;; NOTE: run-id is not used ;; ;; (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) (db:with-db dbstruct - run-id - #t + run-id #f (lambda (dbdat db) - (cond - ((and newstate newstatus newcomment) - (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) - test-id)) - ((and newstate newstatus) - (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) - (else - (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) - (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) - (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) - test-id)))))) - (mt:process-triggers dbstruct run-id test-id newstate newstatus)) + (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment)))) + +(define (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment) + (cond + ((and newstate newstatus newcomment) + (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) + test-id)) + ((and newstate newstatus) + (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) + (else + (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) + (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) + (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) + test-id)))) + ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function + ) ;; NEW BEHAVIOR: Count tests running in all runs! ;; (define (db:get-count-tests-running dbstruct run-id) ;; fastmode) (let* ((qry ;; (if fastmode @@ -2715,18 +2746,21 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (apply vector a b))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") - test-name item-path run-id) - res)))) + (db:get-test-info-db db run-id test-name item-path)))) + +(define (db:get-test-info-db db run-id test-name item-path) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (apply vector a b))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") + test-name item-path run-id) + res)) (define (db:test-get-rundir-from-test-id dbstruct run-id test-id) (db:with-db dbstruct run-id @@ -3177,11 +3211,13 @@ test-name)) (item-path (db:test-get-item-path testdat)) (tl-testdat (db:get-test-info dbstruct run-id test-name "")) (tl-test-id (if tl-testdat (db:test-get-id tl-testdat) - #f))) + #f)) + (new-state-eh #f) + (new-status-eh #f)) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (db:general-call dbstruct run-id 'set-test-start-time (list test-id))) (mutex-lock! *db-transaction-mutex*) (db:with-db dbstruct run-id #f @@ -3189,29 +3225,32 @@ (let ((tr-res (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status + (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test - (state-statuses (db:roll-up-rules state-status-counts state status)) - (newstate (car state-statuses)) - (newstatus (cadr state-statuses))) + (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (state-statuses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-statuses)) + (newstatus (cadr state-statuses))) + (set! new-state-eh newstate) + (set! new-status-eh newstatus) (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " - (apply conc - (map (lambda (x) - (conc - (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) - state-status-counts))); end debug:print - - (if tl-test-id - (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + (if tl-test-id + (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct )))))) (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) + (if new-state-eh ;; moved from db:test-set-state-status + (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh)) tr-res))))) (define (db:roll-up-rules state-status-counts state status) (let* ((running (length (filter (lambda (x) (member (dbr:counts-state x) *common:running-states*)) @@ -3275,56 +3314,53 @@ (lambda (dbdat db) (let ((tr-res (sqlite3:with-transaction db (lambda () - (let* ((state-status-counts (db:get-all-state-status-counts-for-run db run-id)) + (let* ((state-status-counts (db:get-all-state-status-counts-for-run-db db run-id)) (state-statuses (db:roll-up-rules state-status-counts #f #f )) (newstate (car state-statuses)) (newstatus (cadr state-statuses))) (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) - (db:set-run-state-status db run-id newstate newstatus ))))))) + (db:set-run-state-status-db db run-id newstate newstatus ))))))) (mutex-unlock! *db-transaction-mutex*) tr-res)))) + +(define (db:get-all-state-status-counts-for-run-db db run-id) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" + run-id )) (define (db:get-all-state-status-counts-for-run dbstruct run-id) - (let* ((test-count-recs (db:with-db - dbstruct #f #f - (lambda (dbdat db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" - run-id ))))) - test-count-recs)) - + (db:with-db + dbstruct #f #f + (lambda (dbdat db) + (db:get-all-state-status-counts-for-run-db dbstruct run-id)))) ;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* ;; ;; NOTE: This is called within a transaction ;; -(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) - (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) +(define (db:get-all-state-status-counts-for-test db run-id test-name item-path item-state-in item-status-in) + (let* ((test-info (db:get-test-info-db db run-id test-name item-path)) (item-state (or item-state-in (db:test-get-state test-info))) (item-status (or item-status-in (db:test-get-status test-info))) - (other-items-count-recs (db:with-db - dbstruct run-id #f - (lambda (dbdat db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - ;; ignore current item because we have changed its value in the current transation so this select will see the old value. - "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" - run-id test-name item-path)))) - + (other-items-count-recs (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + ;; ignore current item because we have changed its value in the current transation so this select will see the old value. + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + run-id test-name item-path)) ;; add current item to tally outside of sql query - (match-countrec-lambda (lambda (countrec) - (and (equal? (dbr:counts-state countrec) item-state) + (match-countrec-lambda (lambda (countrec) + (and (equal? (dbr:counts-state countrec) item-state) (equal? (dbr:counts-status countrec) item-status)))) - + (already-have-count-rec-list (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status (updated-count-rec (if (null? already-have-count-rec-list) (make-dbr:counts state: item-state status: item-status count: 1) @@ -3334,11 +3370,10 @@ (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) (unrelated-rec-list (filter nonmatch-countrec-lambda other-items-count-recs))) - (cons updated-count-rec unrelated-rec-list))) ;; (define (db:get-all-item-states db run-id test-name) ;; (sqlite3:map-row ;; (lambda (a) a) @@ -4616,11 +4651,12 @@ (set! *task-db* #f))))) (if (and (not (args:get-arg "-server")) *runremote*) (begin (debug:print-info 0 *default-log-port* "Closing all client connections...") - (http-client#close-all-connections!))) + (http-transport:close-connections *runremote*) + #;(http-client#close-all-connections!))) ;; (if (and *runremote* ;; (remote-conndat *runremote*)) ;; (begin ;; (http-client#close-all-connections!))) ;; for http-client (if (not (eq? *default-log-port* (current-error-port))) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -328,19 +328,19 @@ (current-error-port) (lambda () (apply print params)))) (define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500)) - (let* ((busy-file (conc fname"-journal")) + (let* ((busy-file (conc fname "-journal")) (delay-time (* (- 51 tries-left) 1.1)) (write-access (file-write-access? fname)) (dir-access (file-write-access? (pathname-directory fname))) (retry (lambda () (thread-sleep! delay-time) (if (> tries-left 0) (dbfile:cautious-open-database fname init-proc - sync-mode: sync-mode journal-mode: journal-mode + sync-mode journal-mode (- tries-left 1)))))) (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) (if (and (file-write-access? fname) (file-exists? busy-file)) @@ -351,11 +351,11 @@ (thread-sleep! 1) (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: forcing journal rollup "busy-file) (dbfile:brute-force-salvage-db fname))) - (dbfile:cautious-open-database fname init-proc sync-mode: sync-mode journal-mode: journal-mode (- tries-left 1))) + (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1))) (let* ((result (condition-case (if dir-access (dbfile:with-simple-file-lock (conc fname ".lock") @@ -513,11 +513,12 @@ (db:sync-touched dbstruct runid keys dbinit) (set! *db-sync-in-progress* #f) (delete-file* lock-file) #t) (begin - (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.") + (if (common:low-noise-print 120 (conc "no lock "from-db-file)) + (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.")) #f )))) ;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f ;; ;; @@ -658,11 +659,26 @@ '("reviewed" #f) '("iterated" #f) '("avg_runtime" #f) '("avg_disk" #f) '("tags" #f) - '("jobgroup" #f))))) + '("jobgroup" #f)) + + + (list "tasks_queue" + '("id" #f) + '("action" #f) + '("owner" #f) + '("state" #f) + '("target" #f) + '("name" #f) + '("testpatt" #f) + '("keylock" #f) + '("params" #f) + '("creation_time" #f) + '("execution_time" #f)) + ))) (define (db:sync-all-tables-list dbstruct keys) (append (db:sync-main-list dbstruct keys) db:sync-tests-only)) @@ -992,10 +1008,12 @@ ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) + (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) + (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption (have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) #f)) @@ -1008,10 +1026,11 @@ (jfile (conc fname"-journal")) #;(subdb (if have-struct (dbfile:get-subdb dbstruct run-id) #f)) ) ;; was 25 + (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db", fname="fname) (if (file-exists? jfile) (begin (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") (thread-sleep! 0.2))) (if (and use-mutex Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -27,10 +27,13 @@ (declare (unit dcommon)) (declare (uses gutils)) (declare (uses db)) +(declare (uses commonmod)) + +(import commonmod) ;; (declare (uses synchash)) (include "megatest-version.scm") (include "common_records.scm") (include "db_records.scm") @@ -635,11 +638,12 @@ (common:max (map cadr col-indices)))) (max-visible (max (- (dboard:tabdat-num-tests tabdat) 15) 3)) (max-col-vis (if (> max-col 10) 10 max-col)) (numrows 1) (numcols 1)) - (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS") + (if (common:low-noise-print 60 "runs-stats-update-clear") + (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS")) (iup:attribute-set! stats-matrix "NUMCOL" max-col ) (iup:attribute-set! stats-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20 (iup:attribute-set! stats-matrix "NUMCOL_VISIBLE" max-col-vis) (iup:attribute-set! stats-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row)) ;;(print "row-indices: " row-indices " col-indices: " col-indices) ADDED docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf Index: docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf ================================================================== --- /dev/null +++ docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf cannot compute difference between binary files ADDED docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf Index: docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf ================================================================== --- /dev/null +++ docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf cannot compute difference between binary files ADDED docs/reference/queues-dont-fix-overload.pdf Index: docs/reference/queues-dont-fix-overload.pdf ================================================================== --- /dev/null +++ docs/reference/queues-dont-fix-overload.pdf cannot compute difference between binary files Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -153,11 +153,11 @@ (determine-proxy (constantly #f))) (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) (handle-exceptions exn (begin - (print-error-message exn) + ;; (print-error-message exn) (if (< portnum 64000) (begin (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 5 *default-log-port* "exn=" (condition->list exn)) @@ -241,25 +241,17 @@ (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) ;; Send "cmd" with json payload "params" to serverdat and receive result ;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) - (let* ((fullurl (if (vector? serverdat) - (http-transport:server-dat-get-api-req serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1)))) +(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3)) + (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat) + (let* ((fullurl (remote-api-req runremote)) (res (vector #f "uninitialized")) (success #t) (sparams (db:obj->string params transport: 'http)) - (runremote (or area-dat *runremote*)) - (server-id (if (vector? serverdat) - (http-transport:server-dat-get-server-id serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1))))) + (server-id (remote-server-id runremote))) (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) ;; set up the http-client here (max-retry-attempts 1) ;; consider all requests indempotent @@ -286,21 +278,13 @@ (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) ;; what if another thread is communicating ok? Can't happen due to mutex - (set! *runremote* #f) - (set! runremote #f) - ;; (if runremote - ;; (remote-conndat-set! runremote #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + (http-transport:close-connections runremote) (mutex-unlock! *http-mutex*) - ;; (signal (make-composite-condition - ;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;; "communications failed" - (close-all-connections!) + ;; (close-connection! fullurl) (db:obj->string #f)) (with-input-from-request ;; was dat fullurl (list (cons 'key (or server-id "thekey")) (cons 'cmd cmd) @@ -345,67 +329,24 @@ 'timeout 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) ;; careful closing of connections stored in *runremote* ;; -(define (http-transport:close-connections #!key (area-dat #f)) - (let* ((runremote (or area-dat *runremote*)) - (server-dat (if runremote - (remote-conndat runremote) - #f))) ;; (hash-table-ref/default *runremote* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions +(define (http-transport:close-connections runremote) + (if (remote? runremote) + (let ((api-dat (remote-api-uri runremote))) + (handle-exceptions exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (close-connection! api-dat) - (close-idle-connections!) - #t)) - #f))) - - -(define (make-http-transport:server-dat)(make-vector 6)) -(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) -(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) -(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) -(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) -(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) -(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) -;(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) -(define (http-transport:server-dat-get-server-id vec) (vector-ref vec 6)) - -(define (http-transport:server-dat-make-url vec) - (if (and (http-transport:server-dat-get-iface vec) - (http-transport:server-dat-get-port vec)) - (conc "http://" - (http-transport:server-dat-get-iface vec) - ":" - (http-transport:server-dat-get-port vec)) + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (if (args:any-defined? "-server" "-execute" "-run") + (debug:print-info 0 *default-log-port* "Closing connections to "api-dat)) + (if api-dat (close-connection! api-dat)) + (remote-conndat-set! runremote #f) + #t)) #f)) - -(define (http-transport:server-dat-update-last-access vec) - (if (vector? vec) - (vector-set! vec 5 (current-seconds)) - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) - -;; -;; connect -;; -(define (http-transport:client-connect iface port server-id) - (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) - server-dat)) - - - ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running) @@ -429,11 +370,11 @@ (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (and sdat (not changed) (> (- (current-seconds) start-time) 2)) - (let* ((servinfodir (conc *toppath*"/.servinfo")) + (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo")) (ipaddr (car sdat)) (port (cadr sdat)) (servinf (conc servinfodir"/"ipaddr":"port))) (set! servinfofile servinf) (if (not (file-exists? servinfodir)) @@ -448,34 +389,23 @@ (lambda () (delete-file* servinf)) *on-exit-procs*)) ;; put data about this server into a simple flat file host.port (debug:print-info 0 *default-log-port* "Received server alive signature") - #;(common:save-pkt `((action . alive) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat))) - *configdat* #t) sdat) (begin (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (let* ((ipaddr (car sdat)) + (if sdat + (let* ((ipaddr (car sdat)) (port (cadr sdat)) - (servinf (conc *toppath*"/.servinfo/"ipaddr":"port))) - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - ;; (delete-file* servinf) ;; handled by on-exit, can be removed - #;(common:save-pkt `((action . died) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat)) - (msg . "Transport died?")) - *configdat* #t) + (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port))) + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") (exit)) + (exit) + ) (loop start-time (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) (port (cadr server-info)) @@ -504,11 +434,12 @@ (if (and no-sync-db (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :) (begin (if (common:low-noise-print 120 "sync-all-print") (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))) - (db:all-db-sync *dbstruct-dbs*)))) + (db:all-db-sync *dbstruct-dbs*) + ))) ;; when things go wrong we don't want to be doing the various queries too often ;; so we strive to run this stuff only every four seconds or so. (let* ((sync-time (- (current-milliseconds) start-time)) (rem-time (quotient (- 4000 sync-time) 1000))) @@ -560,22 +491,22 @@ (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn) - (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter + (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter (not *server-overloaded*) (file-exists? servinfofile)) (change-file-times servinfofile curr-time curr-time))) - (if (or (common:low-noise-print 120 "start new server") + (if (and (common:low-noise-print 120 "start new server") (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another (begin - (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...") + (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...") (server:kind-run *toppath*) (if (> *api-process-request-count* 100) (begin - (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile) + (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile) (delete-file* servinfofile))))))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) @@ -621,59 +552,36 @@ ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch) - ;; check that a server start is in progress, pause or exit if so - (let* ((tmp-area (common:get-db-tmp-area)) - (server-start (conc tmp-area "/.server-start")) - (server-started (conc tmp-area "/.server-started")) - (start-time (common:lazy-modification-time server-start)) - (started-time (common:lazy-modification-time server-started)) - (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting - (start-time-old (> (- (current-seconds) start-time) 5)) - (cleanup-proc (lambda (msg) - (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) - (full-serv-fname (conc *toppath* "/logs/" serv-fname)) - (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) - (debug:print 0 *default-log-port* msg) - (if (common:file-exists? full-serv-fname) - (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) - (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) - (exit))))) - #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago - (not server-starting)) - (begin - (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") - (exit))) - ;; lets not even bother to start if there are already three or more server files ready to go - #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) - (if (> num-alive 3) - (begin - (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) - (exit)))) - #;(common:save-pkt `((action . start) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))) + ;; check the .servinfo directory, are there other servers running on this + ;; or another host? + (let* ((server-start-is-ok (server:minimal-check *toppath*))) + (if (not server-start-is-ok) + (begin + (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.") + (exit 1)))) + + ;; check that a server start is in progress, pause or exit if so + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit))) ;; (define (http-transport:server-signal-handler signum) ;; (signal-mask! signum) ;; (handle-exceptions ;; exn Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.7009) +(define megatest-version 1.8006) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -654,23 +654,10 @@ ;; for some switches always print the command to stderr ;; (if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) -;; some switches imply homehost. Exit here if not on homehost -;; -(let ((homehost-required (list "-cleanup-db"))) - (if (apply args:any? homehost-required) - (if (not (server:choose-server *toppath* 'home?)) - (for-each - (lambda (switch) - (if (args:get-arg switch) - (begin - (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch - ", you can move homehost by removing the .homehost file but this will disrupt any runs in progress.") - (exit 1)))) - homehost-required)))) ;;====================================================================== ;; Misc setup stuff ;;====================================================================== @@ -934,13 +921,13 @@ ;;====================================================================== ;; Server? Start up here. ;; (if (args:get-arg "-server") - (let ((tl (launch:setup)) - (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) - (server:launch 0 transport-type) + (let ((tl (launch:setup))) + ;; (server:launch 0 'http) + (http-transport:launch) (set! *didsomething* #t))) ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. ;; @@ -2321,10 +2308,14 @@ (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) + (if (not (server:choose-server *toppath* 'home?)) + (begin + (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db") + (exit 1))) (let ((dbstructs (db:setup #f))) (common:cleanup-db dbstructs)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") ADDED mtargs/mtargs.egg Index: mtargs/mtargs.egg ================================================================== --- /dev/null +++ mtargs/mtargs.egg @@ -0,0 +1,7 @@ +((license "LGPL") + (version 0.1) + (category misc) + (dependencies srfi-69 srfi-1) + (author "Matt Welland") + (synopsis "Primitive argument processor.") + (components (extension mtargs))) Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -19,22 +19,28 @@ (module mtargs ( arg-hash get-arg get-arg-from - usage get-args + usage print-args any-defined? - help ) -(import scheme chicken data-structures extras posix ports files) -(use srfi-69 srfi-1) +(import scheme) ;; gives us cond-expand in chicken-4 + +(cond-expand + (chicken-5 + (import scheme (chicken base) (chicken port) (chicken file) (chicken process-context)) + (import srfi-69 srfi-1)) + (chicken-4 + (import chicken posix srfi-69 srfi-1)) + (else)) +(define usage (make-parameter print)) (define arg-hash (make-hash-table)) -(define help "") (define (get-arg arg . default) (if (null? default) (hash-table-ref/default arg-hash arg #f) (hash-table-ref/default arg-hash arg (car default)))) @@ -48,28 +54,10 @@ (define (get-arg-from ht arg . default) (if (null? default) (hash-table-ref/default ht arg #f) (hash-table-ref/default ht arg (car default)))) -(define (usage . args) - (if (> (length args) 0) - (apply print "ERROR: " args)) - (if (string? help) - (print help) - (print "Usage: " (car (argv)) " ... ")) - (exit 0)) - - ;; one-of args defined -(define (any-defined? . param) - (let ((res #f)) - (for-each - (lambda (arg) - (if (get-arg arg)(set! res #t))) - param) - res)) - -;; args: (define (get-args args params switches arg-hash num-needed) (let* ((numtargs (length args)) (adj-num-needed (if num-needed (+ num-needed 2) #f))) (if (< numtargs (if adj-num-needed adj-num-needed 2)) (if (>= num-needed 1) @@ -94,13 +82,12 @@ (else (if (null? tail)(append remtargs (list arg)) ;; return the non-used args (loop (car tail)(cdr tail)(append remtargs (list arg)))))))) )) -(define (print-args remtargs arg-hash) - (print "ARGS: " remtargs) +(define (print-args arg-hash) (for-each (lambda (arg) (print " " arg " " (hash-table-ref/default arg-hash arg #f))) (hash-table-keys arg-hash))) ) Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -64,11 +64,11 @@ exn (begin ;; (release-dot-lock fname) (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + ;; (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) (db (portlogger:open-db fname)) (res (apply proc db params))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -42,19 +42,18 @@ ;;====================================================================== ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; -(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. - (let* ((runremote (or area-dat *runremote*)) - (cinfo (if (remote? runremote) +(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down. + (let* ((cinfo (if (remote? runremote) (remote-conndat runremote) #f))) (if cinfo cinfo (if (server:check-if-running areapath) - (client:setup areapath) + (client:setup areapath runremote) #f)))) (define (rmt:on-homehost? runremote) (let* ((hh-dat (remote-hh-dat runremote))) (if (pair? hh-dat) @@ -172,15 +171,16 @@ ;;DOT CASE4 -> "rmt:send-receive"; ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on - (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) + (+ (remote-last-access runremote) (remote-server-timeout runremote)))) - (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") - (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. - (http-transport:close-connections area-dat: runremote) + (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses in " (remote-server-timeout runremote) " seconds, forcing new connection.") + (http-transport:close-connections runremote) + ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections + ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;;DOT CASE5 [label="local\nread"]; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; @@ -198,27 +198,27 @@ ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; ;;DOT CASE6 -> "rmt:send-receive"; ;; on homehost and this is a write, we already have a server, but server has died ;; reinstate this keep-alive section but inject a time condition into the (add ... - - #;((and (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write - (remote-server-url runremote) ;; have a server - (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") - (http-transport:close-connections area-dat: runremote) ;; make sure to clean up - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; + ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost + ;; (not (member cmd api:read-only-queries)) ;; this is a write + ;; (remote-server-url runremote) ;; have a server + ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. + ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") + ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up + ;; (set! *runremote* (make-remote)) + ;; (let* ((server-info (remote-server-info *runremote*))) + ;; (if server-info + ;; (begin + ;; (remote-server-url-set! *runremote* (server:record->url server-info)) + ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) + ;; (remote-force-server-set! runremote (common:force-server?)) + ;; (mutex-unlock! *rmt-mutex*) + ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") + ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;;DOT CASE7 [label="homehost\nwrite"]; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server @@ -261,11 +261,11 @@ (not (remote-conndat runremote)))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) (mutex-unlock! *rmt-mutex*) (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? (server:start-and-wait *toppath*)) - (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http + (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;;DOT CASE10 [label="on homehost"]; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; @@ -282,69 +282,43 @@ ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; not on homehost, do server query (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) ;;DOT } -;; No Title -;; Error: (vector-ref) out of range -;; #(# (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299))) -;; 6 -;; -;; Call history: -;; -;; http-transport.scm:306: thread-terminate! -;; http-transport.scm:307: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:259: k587 -;; rmt.scm:259: g591 -;; rmt.scm:276: http-transport:server-dat-update-last-access -;; http-transport.scm:364: current-seconds -;; rmt.scm:282: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:283: mutex-unlock! -;; rmt.scm:287: extras-transport-succeded <-- -;; +-----------------------------------------------------------------------------+ -;; | Exit Status : 70 -;; - ;; bunch of small functions factored out of send-receive to make debug easier ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) ;; (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") ;; (mutex-lock! *rmt-mutex*) (let* ((conninfo (remote-conndat runremote)) - (dat-in (case (remote-transport runremote) - ((http) (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:client-api-send-receive 0 conninfo cmd params) - ((servermismatch) (vector #f "Server id mismatch" )) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (else - (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") - (exit)))) + (dat-in (condition-case ;; handling here has + ;; caused a lot of + ;; problems. However it + ;; is needed to deal with + ;; attemtped + ;; communication to + ;; servers that have gone + ;; away + (http-transport:client-api-send-receive 0 runremote cmd params) + ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote) + ((servermismatch) (vector #f "Server id mismatch" )) + ((commfail)(vector #f "communications fail")) + ((exn)(vector #f "other fail" (print-call-chain))))) (dat (if (and (vector? dat-in) ;; ... check it is a correct size (> (vector-length dat-in) 1)) dat-in (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time + (remote-last-access-set! runremote (current-seconds)) ;; refresh access time (begin (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) (set! conninfo #f) - (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. - (http-transport:close-connections area-dat: runremote))) + (http-transport:close-connections runremote))) (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) (mutex-unlock! *rmt-mutex*) (if success ;; success only tells us that the transport was ;; successful, have to examine the data to see if ;; there was a detected issue at the other end @@ -432,18 +406,13 @@ (mutex-lock! *db-multi-sync-mutex*) / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) -(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) +(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params) (let* ((run-id (if run-id run-id 0)) - (res ;; (handle-exceptions - ;; exn - ;; (begin - ;; (print "transport failed. exn=" exn) - ;; #f) - (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; ) + (res (http-transport:client-api-send-receive run-id runremote cmd params))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) ;;====================================================================== @@ -470,15 +439,12 @@ (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; Deprecated for nmsg-transport. ;; -(define (rmt:login-no-auto-client-setup connection-info) - (case *transport-type* ;; run-id of 0 is just a placeholder - ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature)))) - ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))) - )) +(define (rmt:login-no-auto-client-setup runremote) + (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature)))) ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) @@ -1067,12 +1033,11 @@ #f) (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (mutex-lock! *rmt-mutex*) - (remote-conndat-set! runremote #f) - (http-transport:close-connections area-dat: runremote) + (http-transport:close-connections runremote) (remote-server-url-set! runremote #f) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) @@ -1096,11 +1061,11 @@ ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") (mutex-lock! *rmt-mutex*) - (http-transport:close-connections area-dat: runremote) + (http-transport:close-connections runremote) (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1279,15 +1279,23 @@ (list hed tal reg reruns)) ;; If no resources are available just kill time and loop again ;; ((not have-resources) ;; simply try again after waiting a second - (if (runs:lownoise "no resources" 60) + (if (runs:lownoise "no resources" 600) (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) + ;; Have gone back and forth on this but db starvation is an issue. ;; wait one second before looking again to run jobs. - (thread-sleep! 0.25) + ;; (thread-sleep! 0.25) + + ;; new logic. + ;; If it has been more than 10 seconds since we were last here don't wait at all + ;; otherwise sleep 2 seconds to give db a rest and let dashboard read data + (if (runs:lownoise "frequent-no-resources" 10) + (thread-sleep! 0.25) ;; no significant delay + (thread-sleep! 2)) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (list (car newtal)(cdr newtal) reg reruns)) ;; This is the final stage, everything is in place so launch the test ;; @@ -1775,11 +1783,11 @@ (last-jobs-check-time (runs:dat-last-jobs-check-time runsdat)) (should-check-jobs (match can-run-more-tests ((can-run-more-flag num-running nr-in-jobgroup max-concurrent-jobs . params) (if (< (- max-concurrent-jobs num-running) 25) (begin - (debug:print-info 0 *default-log-port* + (debug:print-info 2 *default-log-port* "less than 20 jobs headroom, ("max-concurrent-jobs "-"num-running")>20. Forcing prelaunch check.") #t) #f)) (else #f)))) ;; no record yet Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -65,21 +65,10 @@ ;;====================================================================== ;; Call this to start the actual server ;; -;; all routes though here end in exit ... -;; -;; start_server -;; -(define (server:launch run-id transport-type) - (case transport-type - ((http)(http-transport:launch)) - ;;((nmsg)(nmsg-transport:launch run-id)) - ;;((rpc) (rpc-transport:launch run-id)) - (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) - ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;; Get the transport @@ -133,18 +122,11 @@ ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* (;; (curr-host (get-host-name)) - ;; (attempt-in-progress (server:start-attempted? areapath)) - ;; (dot-server-url (server:check-if-running areapath)) - ;; (curr-ip (server:get-best-guess-address curr-host)) - ;; (curr-pid (current-process-id)) - ;; (homehost (server:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - ;; (target-host (car homehost)) - (testsuite (common:get-testsuite-name)) + (let* ((testsuite (common:get-testsuite-name)) (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") "")) (cmdln (conc (common:get-megatest-exe) " -server - ";; (or target-host "-") @@ -190,46 +172,48 @@ (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f))) (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) - bad-dat) ;; no idea what went wrong, call it a bad server - (with-input-from-file - logf - (lambda () - (let loop ((inl (read-line)) - (lnum 0)) - (if (not (eof-object? inl)) - (let ((mlst (string-match server-rx inl)) - (dbprep (string-match dbprep-rx inl))) - (if dbprep (set! dbprep-found 1)) - (if (not mlst) - (if (< lnum 500) ;; give up if more than 500 lines of server log read - (loop (read-line)(+ lnum 1)) - (begin + exn + (begin + ;; WARNING: this is potentially dangerous to blanket ignore the errors + (if (file-exists? logf) + (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) + bad-dat) ;; no idea what went wrong, call it a bad server + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl))) + (if dbprep (set! dbprep-found 1)) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (begin (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) bad-dat)) - (match mlst - ((_ host port start server-id pid) - (list host - (string->number port) - (string->number start) - server-id - (string->number pid))) - (else - (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) - bad-dat)))) - (begin - (if dbprep-found - (begin - (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) - (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) - bad-dat)))))))) + (match mlst + ((_ host port start server-id pid) + (list host + (string->number port) + (string->number start) + server-id + (string->number pid))) + (else + (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) + bad-dat)))) + (begin + (if dbprep-found + (begin + (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) + (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) + bad-dat)))))))) ;; ;; get a list of servers from the log files, with all relevant data ;; ;; ( mod-time host port start-time pid ) ;; ;; ;; (define (server:get-list areapath #!key (limit #f)) @@ -419,11 +403,12 @@ ;; oldest server alive determines host then choose random of youngest ;; five servers on that host ;; (define (server:get-servers-info areapath) - (let* ((servinfodir (conc *toppath*"/.servinfo"))) + ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") + (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) (if (not (file-exists? servinfodir)) (create-directory servinfodir)) (let* ((allfiles (glob (conc servinfodir"/*"))) (res (make-hash-table))) (for-each @@ -432,15 +417,45 @@ (serverdat (server:logf-get-start-info f))) (match serverdat ((host port start server-id pid) (if (and host port start server-id pid) (hash-table-set! res hostport serverdat) - (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat))) + (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) (else - (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat))))) + (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) allfiles) res))) + +;; check the .servinfo directory, are there other servers running on this +;; or another host? +;; +;; returns #t => ok to start another server +;; #f => not ok to start another server +;; +(define (server:minimal-check areapath) + (server:clean-up-old areapath) + (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) + (servrs (glob (conc srvdir"/*"))) + (thishostip (server:get-best-guess-address (get-host-name))) + (thisservrs (glob (conc srvdir"/"thishostip":*"))) + (homehostinf (server:choose-server areapath 'homehost)) + (havehome (car homehostinf)) + (wearehome (cdr homehostinf))) + (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome + ", numservers: "(length thisservrs)) + (cond + ((not havehome) #t) ;; no homehost yet, go for it + ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another + ((and havehome (not wearehome)) #f) ;; we are not the home host + ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running + (else + (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) + #t)))) + + +(define server-last-start 0) + ;; oldest server alive determines host then choose random of youngest ;; five servers on that host ;; ;; mode: @@ -453,29 +468,46 @@ ;; 1. sort by age ascending and ping until good ;; find alive rand from youngest ;; 1. sort by age descending ;; 2. take five ;; 3. check alive, discard if not and repeat + ;; first we clean up old server files + (server:clean-up-old areapath) + (let* ((since-last (- (current-seconds) server-last-start)) + (server-start-delay 10)) + (if ( < (- (current-seconds) server-last-start) 10 ) + (begin + (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) + (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") + (thread-sleep! server-start-delay) + ) + (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) + ) + ) (let* ((serversdat (server:get-servers-info areapath)) (servkeys (hash-table-keys serversdat)) - (by-time-asc (if (not (null? servkeys)) + (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last (sort servkeys ;; list of "host:port" (lambda (a b) (>= (list-ref (hash-table-ref serversdat a) 2) (list-ref (hash-table-ref serversdat b) 2)))) '()))) + (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) + (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) (if (not (null? by-time-asc)) (let* ((oldest (last by-time-asc)) (oldest-dat (hash-table-ref serversdat oldest)) (host (list-ref oldest-dat 0)) (all-valid (filter (lambda (x) (equal? host (list-ref (hash-table-ref serversdat x) 0))) by-time-asc)) - (best-five (lambda () - (if (> (length all-valid) 5) - (take all-valid 5) - all-valid))) + (best-ten (lambda () + (if (> (length all-valid) 11) + (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out + (if (> (length all-valid) 8) + (drop-right all-valid 1) + all-valid)))) (names->dats (lambda (names) (map (lambda (x) (hash-table-ref serversdat x)) names))) (am-home? (lambda () @@ -488,25 +520,54 @@ (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) (print "youngest: "(hash-table-ref serversdat (car all-valid)))) ((home) host) ((homehost) (cons host (am-home?))) ;; shut up old code ((home?) (am-home?)) - ((best-five)(names->dats (best-five))) + ((best-ten)(names->dats (best-ten))) ((all-valid)(names->dats all-valid)) - ((best) (let* ((best-five (best-five)) - (len (length best-five))) - (hash-table-ref serversdat (list-ref best-five (random len))))) + ((best) (let* ((best-ten (best-ten)) + (len (length best-ten))) + (hash-table-ref serversdat (list-ref best-ten (random len))))) ((count)(length all-valid)) (else (debug:print 0 *default-log-port* "ERROR: invalid command "mode) #f))) (begin (server:run areapath) - (thread-sleep! 3) + (set! server-last-start (current-seconds)) + ;; (thread-sleep! 3) (case mode ((homehost) (cons #f #f)) (else #f)))))) + +(define (server:get-servinfo-dir areapath) + (let* ((spath (conc areapath"/.servinfo"))) + (if (not (file-exists? spath)) + (create-directory spath #t)) + spath)) + +(define (server:clean-up-old areapath) + ;; any server file that has not been touched in ten minutes is effectively dead + (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) + (for-each + (lambda (sfile) + (let* ((modtime (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) + (current-seconds)) + (file-modification-time sfile)))) + (if (and (number? modtime) + (> (- (current-seconds) modtime) + 600)) + (begin + (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") + (handle-exceptions + exn + (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) + (delete-file sfile)))))) + sfiles))) ;; would like to eventually get rid of this ;; (define (common:on-homehost?) (server:choose-server *toppath* 'home?)) @@ -516,11 +577,17 @@ ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least seconds old ;; (server:wait-for-server-start-last-flag areapath) - (if (< (server:choose-server areapath 'count) 10) + (let loop () + (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2) + (begin + (if (common:low-noise-print 30 "our-host-load") + (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server.")) + (loop)))) + (if (< (server:choose-server areapath 'count) 20) (server:run areapath)) #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((lock-file (conc areapath "/logs/server-start.lock"))) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 25) @@ -538,11 +605,12 @@ (let loop ((server-info (server:check-if-running areapath)) (try-num 0)) (if (or server-info (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. (server:record->url server-info) - (let ((num-ok (length (server:choose-server areapath 'all-valid)))) + (let* ( (servers (server:choose-server areapath 'all-valid)) + (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0))) (if (and (> try-num 0) ;; first time through simply wait a little while then try again (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one (server:run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath) @@ -555,11 +623,11 @@ ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) ;; #!key (numservers "2")) (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed - (servers (server:choose-server areapath 'best-five))) ;; (server:get-best (server:get-list areapath)))) + (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath)))) (if (or (and servers (null? servers)) (not servers)) ;; (and (list? servers) ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers @@ -596,48 +664,43 @@ ;; called in megatest.scm, host-port is string hostname:port ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running ;; in the same process as the server. ;; -(define (server:ping host-port-in server-id #!key (do-exit #f)) - (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - #f ;; (server:check-if-running *toppath*) - ;; (if (number? host-port-in) ;; we were handed a server-id - ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; ;; (print "srec: " srec " host-port-in: " host-port-in) - ;; (if srec - ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - ;; (conc "no such server-id " host-port-in))) - host-port-in))) ;; ) - (let* ((host-port (if host:port - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f)) - #f))) -;; (toppath (launch:setup))) - ;; (print "host-port=" host-port) - (if (not host-port) - (begin - (if host-port-in - (debug:print 0 *default-log-port* "ERROR: bad host:port")) - (if do-exit (exit 1)) - #f) - (let* ((iface (car host-port)) - (port (cadr host-port)) - (server-dat (http-transport:client-connect iface port server-id)) - (login-res (rmt:login-no-auto-client-setup server-dat))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f))))))) +(define (server:ping host:port server-id #!key (do-exit #f)) + (let* ((host-port (cond + ((string? host:port) + (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f))) + (else + #f)))) + (cond + ((and (list? host-port) + (eq? (length host-port) 2)) + (let* ((myrunremote (make-remote)) + (iface (car host-port)) + (port (cadr host-port)) + (server-dat (client:connect iface port server-id myrunremote)) + (login-res (rmt:login-no-auto-client-setup myrunremote))) + (if (and (list? login-res) + (car login-res)) + (begin + ;; (print "LOGIN_OK") + (if do-exit (exit 0)) + #t) + (begin + ;; (print "LOGIN_FAILED") + (if do-exit (exit 1)) + #f)))) + (else + (if host:port + (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) + (if do-exit + (exit 1) + #f))))) ;; run ping in separate process, safest way in some cases ;; (define (server:ping-server ifaceport) (with-input-from-pipe @@ -668,11 +731,11 @@ (define (server:expiration-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below (* 3600 (string->number tmo)) - 60))) + 600))) (define (server:get-best-guess-address hostname) (let ((res #f)) (for-each (lambda (adr)