Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,11 +28,11 @@ ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = +MSRCFILES = dbmod.scm # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ # rmtmod.scm apimod.scm @@ -151,11 +151,11 @@ # Special dependencies for the includes $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm # common.o : mofiles/commonmod.o megatest-fossil-hash.scm -mofiles/dbmod.o : mofiles/configfmod.o +# mofiles/dbmod.o : mofiles/configfmod.o # commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm tests.o db.o launch.o runs.o dashboard-tests.o \ dashboard-context-menu.o dashboard-guimonitor.o dashboard-main.o \ @@ -162,10 +162,11 @@ monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm +db.o api.o : mofiles/dbmod.o tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -23,11 +23,14 @@ (use srfi-69 posix) (declare (unit api)) (declare (uses rmt)) (declare (uses db)) +(declare (uses dbmod)) (declare (uses tasks)) + +(import dbmod) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -817,14 +817,14 @@ (define *common:ended-states* ;; states which indicate the test is stopped and will not proceed '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" )) (define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked - '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD")) + '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD" "CHECK")) (define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed - '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")) + '("PASS" "WARN" "WAIVED" "SKIP")) ;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items (define *common:running-states* ;; test is either running or can be run '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) @@ -2208,37 +2208,31 @@ ;; (define (common:wait-for-cpuload maxnormload numcpus-in #!key (count 1000) (msg #f)(remote-host #f)(num-tries 5)) (let* ((loadavg (common:get-cpu-load remote-host)) - ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again - (numcpus (if (<= 1 numcpus-in) - (common:get-num-cpus remote-host) - numcpus-in)) - (first (car loadavg)) - (next (cadr loadavg)) - (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug - ;; where numcpus - ;; (or could be - ;; maxload) is - ;; zero, crude - ;; fallback is to - ;; at least use 1 - ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit - ;; etc. - (effective-load (common:get-intercept first next)) - (recommended-delay (common:get-delay effective-load numcpus)) - (effective-host (or remote-host "localhost")) - (normalized-effective-load (/ effective-load numcpus)) - (will-wait (> normalized-effective-load maxnormload))) - (if (> recommended-delay 1) - (let* ((actual-delay (min recommended-delay 30))) - (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) - (debug:print-info 0 *default-log-port* "Load control, delaying " + ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again + (numcpus (if (<= 1 numcpus-in) + (common:get-num-cpus remote-host) numcpus-in)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjmaxload (* maxnormload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude + ;; fallback is to at least use 1 + ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit + ;; etc. + (effective-load (common:get-intercept first next)) + (recommended-delay (common:get-delay effective-load numcpus)) + (effective-host (or remote-host "localhost")) + (normalized-effective-load (/ effective-load numcpus)) + (will-wait (> normalized-effective-load maxnormload))) + (if (and will-wait (> recommended-delay 1)) + (let* ((actual-delay (min recommended-delay 30))) + (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) + (debug:print-info 0 *default-log-port* "Load control, delaying " actual-delay " seconds to maintain safe load. current normalized effective load is " normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) - (thread-sleep! actual-delay))) + (thread-sleep! actual-delay))) (cond ;; bad data, try again to get the data ((not will-wait) (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) @@ -2634,11 +2628,11 @@ (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path)) (if (string-match "^.*/isoenv-core/.*" path) (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**"))))) -(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES"))) +(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES" "HOSTNAME"))) ;;(bb-check-path msg: "save-environment-as-files entry") (let ((envvars (get-environment-variables)) (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) (mungeval (lambda (val) (cond Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -52,10 +52,14 @@ (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) (append newalist (list (if metadata (list key val metadata) (list key val)))))) +;; this is used in megatestqa/ext.scm. +;; remove it from here and there by 12/31/21 +;; (define config:assoc-safe-add configf:assoc-safe-add) + (define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) (hash-table-set! cfgdat section-name (configf:assoc-safe-add (hash-table-ref/default cfgdat section-name '()) var value metadata: metadata))) @@ -144,11 +148,11 @@ (set! result (conc "#{(" cmdtype ") " cmd "}")))) (case cmdsym ((system shell scheme) (let ((delta (- (current-seconds) start-time))) (if (> delta 2) - (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) + (debug:print-info 2 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result))))) (loop (conc prestr result poststr))) res)) res))) @@ -531,12 +535,12 @@ (define configf:read-file read-config) ;; safely look up a value that is expected to be a number, return ;; a default (#f unless provided) ;; -(define (configf:lookup-number cfdat section varname #!key (default #f)) - (let* ((val (configf:lookup *configdat* section varname)) +(define (configf:lookup-number cfgdat section varname #!key (default #f)) + (let* ((val (configf:lookup cfgdat section varname)) (res (if val (string->number (string-substitute "\\s+" "" val #t)) #f))) (cond (res res) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -953,15 +953,13 @@ (when (> elapsed-time 2) (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.") (let* ((old-val (iup:attribute *tim* "TIME")) (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val))))))) (if (< (string->number new-val) 5000) - ((debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) - (iup:attribute-set! *tim* "TIME" new-val)))) - - - ) + (begin + (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) + (iup:attribute-set! *tim* "TIME" new-val))))) (dboard:tabdat-allruns-set! tabdat new-res) maxtests) (if (> (dboard:rundat-run-data-offset run-struct) 0) (loop run tal new-res newmaxtests) ;; not done getting data for this run (loop (car tal)(cdr tal) new-res newmaxtests))))))) Index: datashare.scm ================================================================== --- datashare.scm +++ datashare.scm @@ -14,812 +14,811 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . -(use ssax) -(use sxml-serializer) -(use sxml-modifications) -(use regex) -(use srfi-69) -(use regex-case) -(use posix) -(use json) -(use csv) -(use srfi-18) -(use format) - -(require-library iup) -(import (prefix iup iup:)) -(require-library ini-file) -(import (prefix ini-file ini:)) - -(use canvas-draw) -(import canvas-draw-iup) - -(use sqlite3 srfi-1 posix regex regex-case srfi-69) -(import (prefix sqlite3 sqlite3:)) - -(declare (uses configf)) -(declare (uses tree)) -(declare (uses margs)) -;; (declare (uses dcommon)) -;; (declare (uses launch)) -;; (declare (uses gutils)) -;; (declare (uses db)) -;; (declare (uses synchash)) -;; (declare (uses server)) -;; (declare (uses megatest-version)) -;; (declare (uses tbd)) - -(include "megatest-fossil-hash.scm") - -;; -;; GLOBALS -;; -(define *datashare:current-tab-number* 0) -(define *args-hash* (make-hash-table)) -(define datashare:help (conc "Usage: datashare [action [params ...]] - -Note: run datashare without parameters to start the gui. - - list-areas : List the allowed areas - - list-versions : List versions available in - options : -full, -vpatt patt - - publish : Publish data for area and with version - - get : Get a link to data, put the link in destpath - options : -i iteration - - update : Update the link to data to the latest iteration. - -Part of the Megatest tool suite. -Learn more at http://www.kiatoa.com/fossils/megatest - -Version: " megatest-fossil-hash)) ;; " - -;;====================================================================== -;; RECORDS -;;====================================================================== - -;; make-vector-record "testing" datastore pkg id area version_name store_type copied source_path iteration submitter datetime storegrp datavol quality disk_id comment -;; testing -(define (make-datashare:pkg)(make-vector 15)) -(define-inline (datashare:pkg-get-id vec) (vector-ref vec 0)) -(define-inline (datashare:pkg-get-area vec) (vector-ref vec 1)) -(define-inline (datashare:pkg-get-version_name vec) (vector-ref vec 2)) -(define-inline (datashare:pkg-get-store_type vec) (vector-ref vec 3)) -(define-inline (datashare:pkg-get-copied vec) (vector-ref vec 4)) -(define-inline (datashare:pkg-get-source_path vec) (vector-ref vec 5)) -(define-inline (datashare:pkg-get-iteration vec) (vector-ref vec 6)) -(define-inline (datashare:pkg-get-submitter vec) (vector-ref vec 7)) -(define-inline (datashare:pkg-get-datetime vec) (vector-ref vec 8)) -(define-inline (datashare:pkg-get-storegrp vec) (vector-ref vec 9)) -(define-inline (datashare:pkg-get-datavol vec) (vector-ref vec 10)) -(define-inline (datashare:pkg-get-quality vec) (vector-ref vec 11)) -(define-inline (datashare:pkg-get-disk_id vec) (vector-ref vec 12)) -(define-inline (datashare:pkg-get-comment vec) (vector-ref vec 13)) -(define-inline (datashare:pkg-get-stored_path vec) (vector-ref vec 14)) -(define-inline (datashare:pkg-set-id! vec val)(vector-set! vec 0 val)) -(define-inline (datashare:pkg-set-area! vec val)(vector-set! vec 1 val)) -(define-inline (datashare:pkg-set-version_name! vec val)(vector-set! vec 2 val)) -(define-inline (datashare:pkg-set-store_type! vec val)(vector-set! vec 3 val)) -(define-inline (datashare:pkg-set-copied! vec val)(vector-set! vec 4 val)) -(define-inline (datashare:pkg-set-source_path! vec val)(vector-set! vec 5 val)) -(define-inline (datashare:pkg-set-iteration! vec val)(vector-set! vec 6 val)) -(define-inline (datashare:pkg-set-submitter! vec val)(vector-set! vec 7 val)) -(define-inline (datashare:pkg-set-datetime! vec val)(vector-set! vec 8 val)) -(define-inline (datashare:pkg-set-storegrp! vec val)(vector-set! vec 9 val)) -(define-inline (datashare:pkg-set-datavol! vec val)(vector-set! vec 10 val)) -(define-inline (datashare:pkg-set-quality! vec val)(vector-set! vec 11 val)) -(define-inline (datashare:pkg-set-disk_id! vec val)(vector-set! vec 12 val)) -(define-inline (datashare:pkg-set-comment! vec val)(vector-set! vec 13 val)) -(define-inline (datashare:pkg-set-stored_path! vec val)(vector-set! vec 14 val)) - -;;====================================================================== -;; DB -;;====================================================================== - -(define (datashare:initialize-db db) - (for-each - (lambda (qry) - (sqlite3:execute db qry)) - (list - "CREATE TABLE pkgs - (id INTEGER PRIMARY KEY, - area TEXT, - version_name TEXT, - store_type TEXT DEFAULT 'copy', - copied INTEGER DEFAULT 0, - source_path TEXT, - stored_path TEXT, - iteration INTEGER DEFAULT 0, - submitter TEXT, - datetime TIMESTAMP DEFAULT (strftime('%s','now')), - storegrp TEXT, - datavol INTEGER, - quality TEXT, - disk_id INTEGER, - comment TEXT);" - "CREATE TABLE refs - (id INTEGER PRIMARY KEY, - pkg_id INTEGER, - destlink TEXT);" - "CREATE TABLE disks - (id INTEGER PRIMARY KEY, - storegrp TEXT, - path TEXT);"))) - -(define (datashare:register-data db area version-name store-type submitter quality source-path comment) - (let ((iter-qry (sqlite3:prepare db "SELECT max(iteration) FROM pkgs WHERE area=? AND version_name=?;")) - (next-iteration 0)) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row - (lambda (iteration) - (if (and (number? iteration) - (>= iteration next-iteration)) - (set! next-iteration (+ iteration 1)))) - iter-qry area version-name) - ;; now store the data - (sqlite3:execute db "INSERT INTO pkgs (area,version_name,iteration,store_type,submitter,source_path,quality,comment) - VALUES (?,?,?,?,?,?,?,?);" - area version-name next-iteration (conc store-type) submitter source-path quality comment))) - (sqlite3:finalize! iter-qry) - next-iteration)) - -(define (datashare:get-id db area version-name iteration) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - "SELECT id FROM pkgs WHERE area=? AND version_name=? AND iteration=?;" - area version-name iteration) - res)) - -(define (datashare:set-stored-path db id path) - (sqlite3:execute db "UPDATE pkgs SET stored_path=? WHERE id=?;" path id)) - -(define (datashare:set-copied db id value) - (sqlite3:execute db "UPDATE pkgs SET copied=? WHERE id=?;" value id)) - -(define (datashare:get-pkg-record db area version-name iteration) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (apply vector a b))) - db - "SELECT * FROM pkgs WHERE area=? AND version_name=? AND iteration=?;" - area - version-name - iteration) - res)) - -;; take version-name iteration and register or update "lastest/0" -;; -(define (datashare:set-latest db id area version-name iteration) - (let* ((rec (datashare:get-pkg-record db area version-name iteration)) - (latest-id (datashare:get-id db area "latest" 0)) - (stored-path (datashare:pkg-get-stored_path rec))) - (if latest-id ;; have a record - bump the link pointer - (datashare:set-stored-path db latest-id stored-path) - (datashare:register-data db area "latest" 'link "auto" "na" stored-path "latest data")))) - -;; set a package ref, this is the location where the link back to the stored data -;; is put. -;; -;; if there is nothing at that location then the record can be removed -;; if there are no refs for a particular pkg-id then that pkg-id is a -;; candidate for removal -;; -(define (datashare:record-pkg-ref db pkg-id dest-link) - (sqlite3:execute db "INSERT INTO refs (pkg_id,destlink) VALUES (?,?);" pkg-id dest-link)) - -(define (datashare:count-refs db pkg-id) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (count) - (set! res count)) - db - "SELECT count(id) FROM refs WHERE pkg_id=?;" - pkg-id) - res)) - -;; Create the sqlite db -(define (datashare:open-db configdat) - (let ((path (configf:lookup configdat "database" "location"))) - (if (and path - (directory? path) - (file-read-access? path)) - (let* ((dbpath (conc path "/datashare.db")) - (writeable (file-write-access? dbpath)) - (dbexists (common:file-exists? dbpath)) - (handler (make-busy-timeout 136000))) - (handle-exceptions - exn - (begin - (debug:print 2 *default-log-port* "ERROR: problem accessing db " dbpath - ((condition-property-accessor 'exn 'message) exn)) - (exit)) - (set! db (sqlite3:open-database dbpath))) - (if *db-write-access* (sqlite3:set-busy-handler! db handler)) - (if (not dbexists) - (begin - (datashare:initialize-db db))) - db) - (print "ERROR: invalid path for storing database: " path)))) - -(define (open-run-close-exception-handling proc idb . params) - (handle-exceptions - exn - (let ((sleep-time (random 30)) - (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - (case err-status - ((busy) - (thread-sleep! sleep-time)) - (else - (print "EXCEPTION: database overloaded or unreadable.") - (print " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) - (print " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)) - (thread-sleep! sleep-time) - (print "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) - (apply open-run-close-exception-handling proc idb params)) - (apply open-run-close-no-exception-handling proc idb params))) - -(define (open-run-close-no-exception-handling proc idb . params) - ;; (print "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) - (let* ((db (cond - ((sqlite3:database? idb) idb) - ((not idb) (print "ERROR: cannot open-run-close with #f anymore")) - ((procedure? idb) (idb)) - (else (print "ERROR: cannot open-run-close with #f anymore")))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! dbstruct)) - ;; (print "open-run-close-no-exception-handling END" ) - res)) - -(define open-run-close open-run-close-no-exception-handling) - -(define (datashare:get-pkgs db area-filter version-filter iter-filter) - (let ((res '())) - (sqlite3:for-each-row ;; replace with fold ... - (lambda (a . b) - (set! res (cons (list->vector (cons a b)) res))) - db - (conc "SELECT id,area,version_name,store_type,copied,source_path,iteration,submitter,datetime,storegrp,datavol,quality,disk_id,comment,stored_path " - " FROM pkgs WHERE area like ? AND version_name LIKE ? AND iteration " iter-filter ";") - area-filter version-filter) - (reverse res))) - -(define (datashare:get-pkg db area-name version-name #!key (iteration #f)) - (let ((dat '()) - (res #f)) - (sqlite3:for-each-row ;; replace with fold ... - (lambda (a . b) - (set! dat (cons (list->vector (cons a b)) dat))) - db - (conc "SELECT id,area,version_name,store_type,copied,source_path,iteration,submitter,datetime,storegrp,datavol,quality,disk_id,comment,stored_path " - " FROM pkgs WHERE area=? AND version_name=? ORDER BY iteration ASC;") - area-name version-name) - ;; now filter for iteration, either max if #f or specific one - (if (null? dat) - #f - (let loop ((hed (car dat)) - (tal (cdr dat)) - (cur 0)) - (let ((itr (datashare:pkg-get-iteration hed))) - (if (equal? itr iteration) ;; this is the one if iteration is specified - hed - (if (null? tal) - hed - (loop (car tal)(cdr tal))))))))) - -(define (datashare:get-versions-for-area db area-name #!key (version-patt #f)) - (let ((res '()) - (data (make-hash-table))) - (sqlite3:for-each-row - (lambda (version-name submitter iteration submitted-time comment) - ;; 0 1 2 3 4 - (hash-table-set! data version-name (vector version-name submitter iteration submitted-time comment))) - db - "SELECT version_name,submitter,iteration,datetime,comment FROM pkgs WHERE area='megatest' AND version_name != 'latest' AND version_name LIKE ? ORDER BY datetime asc;" - (or version-patt "%")) - (map (lambda (x)(hash-table-ref data x))(sort (hash-table-keys data) string-ci>=)))) - -;;====================================================================== -;; DATA IMPORT/EXPORT -;;====================================================================== - -(define (datashare:import-data configdat source-path dest-path area version iteration) - (let* ((space-avail (car dest-path)) - (disk-path (cdr dest-path)) - (targ-path (conc disk-path "/" area "/" version "/" iteration)) - (id (datashare:get-id db area version iteration)) - (db (datashare:open-db configdat))) - (if (> space-avail 10000) ;; dumb heuristic - (begin - (create-directory targ-path #t) - (datashare:set-stored-path db id targ-path) - (print "Running command: rsync -av " source-path "/ " targ-path "/") - (let ((th1 (make-thread (lambda () - (let ((pid (process-run "rsync" (list "-av" (conc source-path "/") (conc targ-path "/"))))) - (process-wait pid) - (datashare:set-copied db id "yes") - (sqlite3:finalize! db))) - "Data copy"))) - (thread-start! th1)) - #t) - (begin - (print "ERROR: Not enough space in storage area " dest-path) - (datashare:set-copied db id "no") - (sqlite3:finalize! db) - #f)))) - -(define (datashare:get-areas configdat) - (let* ((areadat (configf:get-section configdat "areas")) - (areas (if areadat (map car areadat) '()))) - areas)) - -(define (datashare:publish configdat publish-type area-name version comment spath submitter quality) - ;; input checks - (cond - ((not (member area-name (datashare:get-areas configdat))) - (cons #f (conc "Illegal area name \"" area-name "\""))) - (else - (let ((db (datashare:open-db configdat)) - (iteration (datashare:register-data db area-name version publish-type submitter quality spath comment)) - (dest-store (datashare:get-best-storage configdat))) - (if iteration - (if (eq? 'copy publish-type) - (begin - (datashare:import-data configdat spath dest-store area-name version iteration) - (let ((id (datashare:get-id db area-name version iteration))) - (datashare:set-latest db id area-name version iteration))) - (let ((id (datashare:get-id db area-name version iteration))) - (datashare:set-stored-path db id spath) - (datashare:set-copied db id "yes") - (datashare:set-copied db id "n/a") - (datashare:set-latest db id area-name version iteration))) - (print "ERROR: Failed to get an iteration number")) - (sqlite3:finalize! db) - (cons #t "Successfully saved data"))))) - -(define (datashare:get-best-storage configdat) - (let* ((storage (configf:lookup configdat "settings" "storage")) - (store-areas (if storage (string-split storage) '()))) - (print "Looking for available space in " store-areas) - (datashare:find-most-space store-areas))) - -;; (string->number (list-ref (with-input-from-pipe "df -B1000000 /tmp" (lambda ()(read-line)(string-split (read-line)))) 3)) - -(define (datashare:find-most-space paths) - (fold (lambda (area res) - ;; (print "area=" area " res=" res) - (let ((maxspace (car res)) - (currpath (cdr res))) - ;; (print currpath " " maxspace) - (if (file-write-access? area) - (let ((currspace (string->number - (list-ref - (with-input-from-pipe - ;; (conc "df --output=avail " area) - (conc "df -B1000000 " area) - ;; (lambda ()(read)(read)) - (lambda ()(read-line)(string-split (read-line)))) - 3)))) - (if (> currspace maxspace) - (cons currspace area) - res)) - res))) - (cons 0 #f) - paths)) - -;; remove existing link and if possible ... -;; create path to next of tip of target, create link back to source -(define (datashare:build-dir-make-link source target) - (if (common:file-exists? target)(datashare:backup-move target)) - (create-directory (pathname-directory target) #t) - (create-symbolic-link source target)) - -(define (datashare:backup-move path) - (let* ((trashdir (conc (pathname-directory path) "/.trash")) - (trashfile (conc trashdir "/" (current-seconds) "-" (pathname-file path)))) - (create-directory trashdir #t) - (if (directory? path) - (system (conc "mv " path " " trashfile)) - (file-move path trash-file)))) - -;;====================================================================== -;; GUI -;;====================================================================== - -;; The main menu -(define (datashare:main-menu) - (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) - (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options - (iup:menu-item "Open" action: (lambda (obj) - (iup:show (iup:file-dialog)) - (print "File->open " obj))) - (iup:menu-item "Save" #:action (lambda (obj)(print "File->save " obj))) - (iup:menu-item "Exit" #:action (lambda (obj)(exit))))) - (iup:menu-item "Tools" (iup:menu - (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah"))) - ;; (iup:menu-item "Show dialog" #:action (lambda (obj) - ;; (show message-window - ;; #:modal? #t - ;; ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current - ;; ;; #:x 'mouse - ;; ;; #:y 'mouse - ;; ) - )))) - -(define (datashare:publish-view configdat) - ;; (pp (hash-table->alist configdat)) - (let* ((areas (configf:get-section configdat "areas")) - (label-size "70x") - (areas-sel (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES")) - (version-tb (iup:textbox #:expand "HORIZONTAL")) ;; #:size "50x")) - (areas-sel (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES")) - (component (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES" )) - (version-val (iup:textbox #:expand "HORIZONTAL" #:size "50x")) - ;; (copy-link (iup:toggle #:expand "HORIZONTAL")) - ;; (iteration (iup:textbox #:expand "YES" #:size "20x")) - ;; (iteration (iup:textbox #:expand "HORIZONTAL" #:size "20x")) - (area-filter (iup:textbox #:expand "HORIZONTAL" #:value "%")) - (comment-tb (iup:textbox #:expand "YES" #:multiline "YES")) - (source-tb (iup:textbox #:expand "HORIZONTAL" - #:value (or (configf:lookup configdat "settings" "basepath") - ""))) - (publish (lambda (publish-type) - (let* ((area-num (or (string->number (iup:attribute areas-sel "VALUE")) 0)) - (area-dat (if (> area-num 0)(list-ref areas (- area-num 1))'("NOT SELECTED" "NOT SELECTED"))) - (area-path (cadr area-dat)) - (area-name (car area-dat)) - (version (iup:attribute version-tb "VALUE")) - (comment (iup:attribute comment-tb "VALUE")) - (spath (iup:attribute source-tb "VALUE")) - (submitter (current-user-name)) - (quality 2)) - (datashare:publish configdat publish-type area-name version comment spath submitter quality)))) - (copy (iup:button "Copy and Publish" - #:expand "HORIZONTAL" - #:action (lambda (obj) - (publish 'copy)))) - (link (iup:button "Link and Publish" - #:expand "HORIZONTAL" - #:action (lambda (obj) - (publish 'link)))) - (browse-btn (iup:button "Browse" - #:size "40x" - #:action (lambda (obj) - (let* ((fd (iup:file-dialog #:dialogtype "DIR")) - (top (iup:show fd #:modal? "YES"))) - (iup:attribute-set! source-tb "VALUE" - (iup:attribute fd "VALUE")) - (iup:destroy! fd)))))) - (print "areas") - ;; (pp areas) - (fold (lambda (areadat num) - ;; (print "Adding num=" num ", areadat=" areadat) - (iup:attribute-set! areas-sel (conc num) (car areadat)) - (+ 1 num)) - 1 areas) - (iup:vbox - (iup:hbox (iup:label "Area:" #:size label-size) ;; area-filter - areas-sel) - (iup:hbox (iup:label "Version:" #:size label-size) version-tb) - ;; (iup:hbox (iup:label "Link only" #:size label-size) copy-link) - ;; (iup:label "Iteration:") iteration) - (iup:hbox (iup:label "Comment:" #:size label-size) comment-tb) - (iup:hbox (iup:label "Source base path:" #:size label-size) source-tb browse-btn) - (iup:hbox copy link)))) - -(define (datashare:lst->path pathlst) - (conc "/" (string-intersperse (map conc pathlst) "/"))) - -(define (datashare:path->lst path) - (string-split path "/")) - -(define (datashare:pathdat-apply-heuristics configdat path) - (cond - ((common:file-exists? path) "found") - (else (conc path " not installed")))) - -(define (datashare:get-view configdat) - (iup:vbox - (iup:hbox - (let* ((label-size "60x") - ;; filter elements - (area-filter "%") - (version-filter "%") - (iter-filter ">= 0") - ;; reverse lookup from path to data for src and installed - (srcdat (make-hash-table)) ;; reverse lookup - (installed-dat (make-hash-table)) - ;; config values - (basepath (configf:lookup configdat "settings" "basepath")) - ;; gui elements - (submitter (iup:label "" #:expand "HORIZONTAL")) - (date-submitted (iup:label "" #:expand "HORIZONTAL")) - (comment (iup:label "" #:expand "HORIZONTAL")) - (copy-link (iup:label "" #:expand "HORIZONTAL")) - (quality (iup:label "" #:expand "HORIZONTAL")) - (installed-status (iup:label "" #:expand "HORIZONTAL")) - ;; misc - (curr-record #f) - ;; (source-data (iup:label "" #:expand "HORIZONTAL")) - (tb (iup:treebox - #:value 0 - #:name "Packages" - #:expand "YES" - #:addexpanded "NO" - #:selection-cb - (lambda (obj id state) - ;; (print "obj: " obj ", id: " id ", state: " state) - (let* ((path (datashare:lst->path (cdr (tree:node->path obj id)))) - (record (hash-table-ref/default srcdat path #f))) - (if record - (begin - (set! curr-record record) - (iup:attribute-set! submitter "TITLE" (datashare:pkg-get-submitter record)) - (iup:attribute-set! date-submitted "TITLE" (time->string (seconds->local-time (datashare:pkg-get-datetime record)))) - (iup:attribute-set! comment "TITLE" (datashare:pkg-get-comment record)) - (iup:attribute-set! quality "TITLE" (datashare:pkg-get-quality record)) - (iup:attribute-set! copy-link "TITLE" (datashare:pkg-get-store_type record)) - )) - ;; (print "id=" id " path=" path " record=" record);; (tree:node->path obj id) " run-id: " run-id) - )))) - (tb2 (iup:treebox - #:value 0 - #:name "Installed" - #:expand "YES" - #:addexpanded "NO" - #:selection-cb - (lambda (obj id state) - ;; (print "obj: " obj ", id: " id ", state: " state) - (let* ((path (datashare:lst->path (cdr (tree:node->path obj id)))) - (status (hash-table-ref/default installed-dat path #f))) - (iup:attribute-set! installed-status "TITLE" (if status status "")) - )))) - (refresh (lambda (obj) - (let* ((db (datashare:open-db configdat)) - (areas (or (configf:get-section configdat "areas") '()))) - ;; - ;; first update the Sources - ;; - (for-each - (lambda (pkgitem) - (let* ((pkg-path (list (datashare:pkg-get-area pkgitem) - (datashare:pkg-get-version_name pkgitem) - (datashare:pkg-get-iteration pkgitem))) - (pkg-id (datashare:pkg-get-id pkgitem)) - (path (datashare:lst->path pkg-path))) - ;; (print "tree:add-node tb=" tb ", pkg-path=" pkg-path ", pkg-id=" pkg-id) - (if (not (hash-table-ref/default srcdat path #f)) - (tree:add-node tb "Packages" pkg-path userdata: (conc "pkg-id: " pkg-id))) - ;; (print "path=" path " pkgitem=" pkgitem) - (hash-table-set! srcdat path pkgitem))) - (datashare:get-pkgs db area-filter version-filter iter-filter)) - ;; - ;; then update the installed - ;; - (for-each - (lambda (area) - (let* ((path (conc "/" (cadr area))) - (fullpath (conc basepath path))) - (if (not (hash-table-ref/default installed-dat path #f)) - (tree:add-node tb2 "Installed" (datashare:path->lst path))) - (hash-table-set! installed-dat path (datashare:pathdat-apply-heuristics configdat fullpath)))) - areas) - (sqlite3:finalize! db)))) - (apply (iup:button "Apply" - #:action - (lambda (obj) - (if curr-record - (let* ((area (datashare:pkg-get-area curr-record)) - (stored-path (datashare:pkg-get-stored_path curr-record)) - (source-type (datashare:pkg-get-store_type curr-record)) - (source-path (case source-type ;; (equal? source-type "link")) - ((link)(datashare:pkg-get-source-path curr-record)) - ((copy)stored-path) - (else #f))) - (dest-stub (configf:lookup configdat "areas" area)) - (target-path (conc basepath "/" dest-stub))) - (datashare:build-dir-make-link stored-path target-path) - (print "Creating link from " stored-path " to " target-path))))))) - (iup:vbox - (iup:hbox tb tb2) - (iup:frame - #:title "Source Info" - (iup:vbox - (iup:hbox (iup:button "Refresh" #:action refresh) apply) - (iup:hbox (iup:label "Submitter: ") ;; #:size label-size) - submitter - (iup:label "Submitted on: ") ;; #:size label-size) - date-submitted) - (iup:hbox (iup:label "Data stored: ") - copy-link - (iup:label "Quality: ") - quality) - (iup:hbox (iup:label "Comment: ") - comment))) - (iup:frame - #:title "Installed Info" - (iup:vbox - (iup:hbox (iup:label "Installed status/path: ") installed-status))) - ))))) - -(define (datashare:manage-view configdat) - (iup:vbox - (iup:hbox - (iup:button "Pushme" - #:expand "YES" - )))) - -(define (datashare:gui configdat) - (iup:show - (iup:dialog - #:title (conc "DataShare dashboard " (current-user-name) ":" (current-directory)) - #:menu (datashare:main-menu) - (let* ((tabs (iup:tabs - #:tabchangepos-cb (lambda (obj curr prev) - (set! *datashare:current-tab-number* curr)) - (datashare:publish-view configdat) - (datashare:get-view configdat) - (datashare:manage-view configdat) - ))) - ;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c))) - (iup:attribute-set! tabs "TABTITLE0" "Publish") - (iup:attribute-set! tabs "TABTITLE1" "Get") - (iup:attribute-set! tabs "TABTITLE2" "Manage") - ;; (iup:attribute-set! tabs "BGCOLOR" "190 190 190") - tabs))) - (iup:main-loop)) - -;;====================================================================== -;; MISC -;;====================================================================== - - -(define (datashare:do-as-calling-user proc) - (let ((eid (current-effective-user-id)) - (cid (current-user-id))) - (if (not (eq? eid cid)) ;; running suid - (set! (current-effective-user-id) cid)) - ;; (print "running as " (current-effective-user-id)) - (proc) - (if (not (eq? eid cid)) - (set! (current-effective-user-id) eid)))) - -(define (datashare:find name paths) - (if (null? paths) - #f - (let loop ((hed (car paths)) - (tal (cdr paths))) - (if (common:file-exists? (conc hed "/" name)) - hed - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))) - -;;====================================================================== -;; MAIN -;;====================================================================== - -(define (datashare:load-config exe-dir exe-name) - (let* ((fname (conc exe-dir "/." exe-name ".config"))) - (ini:property-separator-patt " * *") - (ini:property-separator #\space) - (if (common:file-exists? fname) - ;; (ini:read-ini fname) - (read-config fname #f #t) - (make-hash-table)))) - -(define (datashare:process-action configdat action . args) - (case (string->symbol action) - ((get) - (if (< (length args) 2) - (begin - (print "ERROR: Missing arguments; " (string-intersperse args ", ")) - (exit 1)) - (let* ((basepath (configf:lookup configdat "settings" "basepath")) - (db (datashare:open-db configdat)) - (area (car args)) - (version (cadr args)) ;; iteration - (remargs (args:get-args args '("-i") '() args:arg-hash 0)) - (iteration (if (args:get-arg "-i")(string->number (args:get-arg "-i")) #f)) - (curr-record (datashare:get-pkg db area version iteration: iteration))) - (if (not curr-record) - (begin - (print "ERROR: No matching record found; area=" area ", version=" version ", iteration=" (if iteration iteration "(max)")) - (exit 1)) - (let* ((stored-path (datashare:pkg-get-stored_path curr-record)) - (source-type (datashare:pkg-get-store_type curr-record)) - (source-path (case source-type ;; (equal? source-type "link")) - ((link) (datashare:pkg-get-source-path curr-record)) - ((copy) stored-path) - (else #f))) - (dest-stub (configf:lookup configdat "areas" area)) - (target-path (conc basepath "/" dest-stub))) - (datashare:build-dir-make-link stored-path target-path) - (datashare:record-pkg-ref db (datashare:pkg-get-id curr-record) target-path) - (sqlite3:finalize! db) - (print "Creating link from " stored-path " to " target-path)))))) - ((publish) - (if (< (length args) 3) - (begin - (print "ERROR: Missing arguments; " (string-intersperse args ", ")) - (exit 1)) - (let* ((srcpath (list-ref args 0)) - (areaname (list-ref args 1)) - (version (list-ref args 2)) - (remargs (args:get-args (drop args 2) - '("-type" ;; link or copy (default is copy) - "-m") - '() - args:arg-hash - 0)) - (publish-type (if (equal? (args:get-arg "-type") "link") 'link 'copy)) - (comment (or (args:get-arg "-m") "")) - (submitter (current-user-name)) - (quality (args:get-arg "-quality")) - (publish-res (datashare:publish configdat publish-type areaname version comment srcpath submitter quality))) - (if (not (car publish-res)) - (begin - (print "ERROR: " (cdr publish-res)) - (exit 1)))))) - ((list-versions) - (let ((area-name (car args)) ;; version patt full print - (remargs (args:get-args args '("-vpatt") '("-full") args:arg-hash 0)) - (db (datashare:open-db configdat)) - (versions (datashare:get-versions-for-area db (car args) version-patt: (args:get-arg "-vpatt")))) - ;; (print "area-name=" area-name " args=" args " *args-hash*=" (hash-table->alist *args-hash*)) - (map (lambda (x) - (if (args:get-arg "-full") - (format #t - "~10a~10a~4a~27a~30a\n" - (vector-ref x 0) - (vector-ref x 1) - (vector-ref x 2) - (conc "\"" (time->string (seconds->local-time (vector-ref x 3))) "\"") - (conc "\"" (vector-ref x 4) "\"")) - (print (vector-ref x 0)))) - versions) - (sqlite3:finalize! db))))) - -;; ease debugging by loading ~/.dashboardrc - REMOVE FROM PRODUCTION! -(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.datasharerc"))) - (if (common:file-exists? debugcontrolf) - (load debugcontrolf))) - -(define (main) - (let* ((args (argv)) - (prog (car args)) - (rema (cdr args)) - (exe-name (pathname-file (car (argv)))) - (exe-dir (or (pathname-directory prog) - (datashare:find exe-name (string-split (get-environment-variable "PATH") ":")))) - (configdat (datashare:load-config exe-dir exe-name))) - (cond - ;; one-word commands - ((eq? (length rema) 1) - (case (string->symbol (car rema)) - ((help -h -help --h --help) - (print datashare:help)) - ((list-areas) - (map print (datashare:get-areas configdat))) - (else - (print "ERROR: Unrecognised command. Try \"datashare help\"")))) - ;; multi-word commands - ((null? rema)(datashare:gui configdat)) - ((>= (length rema) 2) - (apply datashare:process-action configdat (car rema)(cdr rema))) - (else (print "ERROR: Unrecognised command. Try \"datashare help\""))))) - -(main) +;; ==> (module datashare +;; ==> (use ssax) +;; ==> (use sxml-serializer) +;; ==> (use sxml-modifications) +;; ==> (use regex) +;; ==> (use srfi-69) +;; ==> (use regex-case) +;; ==> (use posix) +;; ==> (use json) +;; ==> (use csv) +;; ==> (use srfi-18) +;; ==> (use format) +;; ==> +;; ==> (use (prefix iup iup:)) +;; ==> (import (prefix ini-file ini:)) +;; ==> +;; ==> (use canvas-draw) +;; ==> (import canvas-draw-iup) +;; ==> +;; ==> (use sqlite3 srfi-1 posix regex regex-case srfi-69) +;; ==> (import (prefix sqlite3 sqlite3:)) +;; ==> +;; ==> (declare (uses configf)) +;; ==> (declare (uses tree)) +;; ==> (declare (uses margs)) +;; ==> ;; (declare (uses dcommon)) +;; ==> ;; (declare (uses launch)) +;; ==> ;; (declare (uses gutils)) +;; ==> ;; (declare (uses db)) +;; ==> ;; (declare (uses synchash)) +;; ==> ;; (declare (uses server)) +;; ==> ;; (declare (uses megatest-version)) +;; ==> ;; (declare (uses tbd)) +;; ==> +;; ==> (include "megatest-fossil-hash.scm") +;; ==> +;; ==> ;; +;; ==> ;; GLOBALS +;; ==> ;; +;; ==> (define *datashare:current-tab-number* 0) +;; ==> (define *args-hash* (make-hash-table)) +;; ==> (define datashare:help (conc "Usage: datashare [action [params ...]] +;; ==> +;; ==> Note: run datashare without parameters to start the gui. +;; ==> +;; ==> list-areas : List the allowed areas +;; ==> +;; ==> list-versions : List versions available in +;; ==> options : -full, -vpatt patt +;; ==> +;; ==> publish : Publish data for area and with version +;; ==> +;; ==> get : Get a link to data, put the link in destpath +;; ==> options : -i iteration +;; ==> +;; ==> update : Update the link to data to the latest iteration. +;; ==> +;; ==> Part of the Megatest tool suite. +;; ==> Learn more at http://www.kiatoa.com/fossils/megatest +;; ==> +;; ==> Version: " megatest-fossil-hash)) ;; " +;; ==> +;; ==> ;;====================================================================== +;; ==> ;; RECORDS +;; ==> ;;====================================================================== +;; ==> +;; ==> ;; make-vector-record "testing" datastore pkg id area version_name store_type copied source_path iteration submitter datetime storegrp datavol quality disk_id comment +;; ==> ;; testing +;; ==> (define (make-datashare:pkg)(make-vector 15)) +;; ==> (define-inline (datashare:pkg-get-id vec) (vector-ref vec 0)) +;; ==> (define-inline (datashare:pkg-get-area vec) (vector-ref vec 1)) +;; ==> (define-inline (datashare:pkg-get-version_name vec) (vector-ref vec 2)) +;; ==> (define-inline (datashare:pkg-get-store_type vec) (vector-ref vec 3)) +;; ==> (define-inline (datashare:pkg-get-copied vec) (vector-ref vec 4)) +;; ==> (define-inline (datashare:pkg-get-source_path vec) (vector-ref vec 5)) +;; ==> (define-inline (datashare:pkg-get-iteration vec) (vector-ref vec 6)) +;; ==> (define-inline (datashare:pkg-get-submitter vec) (vector-ref vec 7)) +;; ==> (define-inline (datashare:pkg-get-datetime vec) (vector-ref vec 8)) +;; ==> (define-inline (datashare:pkg-get-storegrp vec) (vector-ref vec 9)) +;; ==> (define-inline (datashare:pkg-get-datavol vec) (vector-ref vec 10)) +;; ==> (define-inline (datashare:pkg-get-quality vec) (vector-ref vec 11)) +;; ==> (define-inline (datashare:pkg-get-disk_id vec) (vector-ref vec 12)) +;; ==> (define-inline (datashare:pkg-get-comment vec) (vector-ref vec 13)) +;; ==> (define-inline (datashare:pkg-get-stored_path vec) (vector-ref vec 14)) +;; ==> (define-inline (datashare:pkg-set-id! vec val)(vector-set! vec 0 val)) +;; ==> (define-inline (datashare:pkg-set-area! vec val)(vector-set! vec 1 val)) +;; ==> (define-inline (datashare:pkg-set-version_name! vec val)(vector-set! vec 2 val)) +;; ==> (define-inline (datashare:pkg-set-store_type! vec val)(vector-set! vec 3 val)) +;; ==> (define-inline (datashare:pkg-set-copied! vec val)(vector-set! vec 4 val)) +;; ==> (define-inline (datashare:pkg-set-source_path! vec val)(vector-set! vec 5 val)) +;; ==> (define-inline (datashare:pkg-set-iteration! vec val)(vector-set! vec 6 val)) +;; ==> (define-inline (datashare:pkg-set-submitter! vec val)(vector-set! vec 7 val)) +;; ==> (define-inline (datashare:pkg-set-datetime! vec val)(vector-set! vec 8 val)) +;; ==> (define-inline (datashare:pkg-set-storegrp! vec val)(vector-set! vec 9 val)) +;; ==> (define-inline (datashare:pkg-set-datavol! vec val)(vector-set! vec 10 val)) +;; ==> (define-inline (datashare:pkg-set-quality! vec val)(vector-set! vec 11 val)) +;; ==> (define-inline (datashare:pkg-set-disk_id! vec val)(vector-set! vec 12 val)) +;; ==> (define-inline (datashare:pkg-set-comment! vec val)(vector-set! vec 13 val)) +;; ==> (define-inline (datashare:pkg-set-stored_path! vec val)(vector-set! vec 14 val)) +;; ==> +;; ==> ;;====================================================================== +;; ==> ;; DB +;; ==> ;;====================================================================== +;; ==> +;; ==> (define (datashare:initialize-db db) +;; ==> (for-each +;; ==> (lambda (qry) +;; ==> (sqlite3:execute db qry)) +;; ==> (list +;; ==> "CREATE TABLE pkgs +;; ==> (id INTEGER PRIMARY KEY, +;; ==> area TEXT, +;; ==> version_name TEXT, +;; ==> store_type TEXT DEFAULT 'copy', +;; ==> copied INTEGER DEFAULT 0, +;; ==> source_path TEXT, +;; ==> stored_path TEXT, +;; ==> iteration INTEGER DEFAULT 0, +;; ==> submitter TEXT, +;; ==> datetime TIMESTAMP DEFAULT (strftime('%s','now')), +;; ==> storegrp TEXT, +;; ==> datavol INTEGER, +;; ==> quality TEXT, +;; ==> disk_id INTEGER, +;; ==> comment TEXT);" +;; ==> "CREATE TABLE refs +;; ==> (id INTEGER PRIMARY KEY, +;; ==> pkg_id INTEGER, +;; ==> destlink TEXT);" +;; ==> "CREATE TABLE disks +;; ==> (id INTEGER PRIMARY KEY, +;; ==> storegrp TEXT, +;; ==> path TEXT);"))) +;; ==> +;; ==> (define (datashare:register-data db area version-name store-type submitter quality source-path comment) +;; ==> (let ((iter-qry (sqlite3:prepare db "SELECT max(iteration) FROM pkgs WHERE area=? AND version_name=?;")) +;; ==> (next-iteration 0)) +;; ==> (sqlite3:with-transaction +;; ==> db +;; ==> (lambda () +;; ==> (sqlite3:for-each-row +;; ==> (lambda (iteration) +;; ==> (if (and (number? iteration) +;; ==> (>= iteration next-iteration)) +;; ==> (set! next-iteration (+ iteration 1)))) +;; ==> iter-qry area version-name) +;; ==> ;; now store the data +;; ==> (sqlite3:execute db "INSERT INTO pkgs (area,version_name,iteration,store_type,submitter,source_path,quality,comment) +;; ==> VALUES (?,?,?,?,?,?,?,?);" +;; ==> area version-name next-iteration (conc store-type) submitter source-path quality comment))) +;; ==> (sqlite3:finalize! iter-qry) +;; ==> next-iteration)) +;; ==> +;; ==> (define (datashare:get-id db area version-name iteration) +;; ==> (let ((res #f)) +;; ==> (sqlite3:for-each-row +;; ==> (lambda (id) +;; ==> (set! res id)) +;; ==> db +;; ==> "SELECT id FROM pkgs WHERE area=? AND version_name=? AND iteration=?;" +;; ==> area version-name iteration) +;; ==> res)) +;; ==> +;; ==> (define (datashare:set-stored-path db id path) +;; ==> (sqlite3:execute db "UPDATE pkgs SET stored_path=? WHERE id=?;" path id)) +;; ==> +;; ==> (define (datashare:set-copied db id value) +;; ==> (sqlite3:execute db "UPDATE pkgs SET copied=? WHERE id=?;" value id)) +;; ==> +;; ==> (define (datashare:get-pkg-record db area version-name iteration) +;; ==> (let ((res #f)) +;; ==> (sqlite3:for-each-row +;; ==> (lambda (a . b) +;; ==> (set! res (apply vector a b))) +;; ==> db +;; ==> "SELECT * FROM pkgs WHERE area=? AND version_name=? AND iteration=?;" +;; ==> area +;; ==> version-name +;; ==> iteration) +;; ==> res)) +;; ==> +;; ==> ;; take version-name iteration and register or update "lastest/0" +;; ==> ;; +;; ==> (define (datashare:set-latest db id area version-name iteration) +;; ==> (let* ((rec (datashare:get-pkg-record db area version-name iteration)) +;; ==> (latest-id (datashare:get-id db area "latest" 0)) +;; ==> (stored-path (datashare:pkg-get-stored_path rec))) +;; ==> (if latest-id ;; have a record - bump the link pointer +;; ==> (datashare:set-stored-path db latest-id stored-path) +;; ==> (datashare:register-data db area "latest" 'link "auto" "na" stored-path "latest data")))) +;; ==> +;; ==> ;; set a package ref, this is the location where the link back to the stored data +;; ==> ;; is put. +;; ==> ;; +;; ==> ;; if there is nothing at that location then the record can be removed +;; ==> ;; if there are no refs for a particular pkg-id then that pkg-id is a +;; ==> ;; candidate for removal +;; ==> ;; +;; ==> (define (datashare:record-pkg-ref db pkg-id dest-link) +;; ==> (sqlite3:execute db "INSERT INTO refs (pkg_id,destlink) VALUES (?,?);" pkg-id dest-link)) +;; ==> +;; ==> (define (datashare:count-refs db pkg-id) +;; ==> (let ((res 0)) +;; ==> (sqlite3:for-each-row +;; ==> (lambda (count) +;; ==> (set! res count)) +;; ==> db +;; ==> "SELECT count(id) FROM refs WHERE pkg_id=?;" +;; ==> pkg-id) +;; ==> res)) +;; ==> +;; ==> ;; Create the sqlite db +;; ==> (define (datashare:open-db configdat) +;; ==> (let ((path (configf:lookup configdat "database" "location"))) +;; ==> (if (and path +;; ==> (directory? path) +;; ==> (file-read-access? path)) +;; ==> (let* ((dbpath (conc path "/datashare.db")) +;; ==> (writeable (file-write-access? dbpath)) +;; ==> (dbexists (common:file-exists? dbpath)) +;; ==> (handler (make-busy-timeout 136000))) +;; ==> (handle-exceptions +;; ==> exn +;; ==> (begin +;; ==> (debug:print 2 *default-log-port* "ERROR: problem accessing db " dbpath +;; ==> ((condition-property-accessor 'exn 'message) exn)) +;; ==> (exit)) +;; ==> (set! db (sqlite3:open-database dbpath))) +;; ==> (if *db-write-access* (sqlite3:set-busy-handler! db handler)) +;; ==> (if (not dbexists) +;; ==> (begin +;; ==> (datashare:initialize-db db))) +;; ==> db) +;; ==> (print "ERROR: invalid path for storing database: " path)))) +;; ==> +;; ==> (define (open-run-close-exception-handling proc idb . params) +;; ==> (handle-exceptions +;; ==> exn +;; ==> (let ((sleep-time (random 30)) +;; ==> (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) +;; ==> (case err-status +;; ==> ((busy) +;; ==> (thread-sleep! sleep-time)) +;; ==> (else +;; ==> (print "EXCEPTION: database overloaded or unreadable.") +;; ==> (print " message: " ((condition-property-accessor 'exn 'message) exn)) +;; ==> (print "exn=" (condition->list exn)) +;; ==> (print " status: " ((condition-property-accessor 'sqlite3 'status) exn)) +;; ==> (print-call-chain (current-error-port)) +;; ==> (thread-sleep! sleep-time) +;; ==> (print "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) +;; ==> (apply open-run-close-exception-handling proc idb params)) +;; ==> (apply open-run-close-no-exception-handling proc idb params))) +;; ==> +;; ==> (define (open-run-close-no-exception-handling proc idb . params) +;; ==> ;; (print "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) +;; ==> (let* ((db (cond +;; ==> ((sqlite3:database? idb) idb) +;; ==> ((not idb) (print "ERROR: cannot open-run-close with #f anymore")) +;; ==> ((procedure? idb) (idb)) +;; ==> (else (print "ERROR: cannot open-run-close with #f anymore")))) +;; ==> (res #f)) +;; ==> (set! res (apply proc db params)) +;; ==> (if (not idb)(sqlite3:finalize! dbstruct)) +;; ==> ;; (print "open-run-close-no-exception-handling END" ) +;; ==> res)) +;; ==> +;; ==> (define open-run-close open-run-close-no-exception-handling) +;; ==> +;; ==> (define (datashare:get-pkgs db area-filter version-filter iter-filter) +;; ==> (let ((res '())) +;; ==> (sqlite3:for-each-row ;; replace with fold ... +;; ==> (lambda (a . b) +;; ==> (set! res (cons (list->vector (cons a b)) res))) +;; ==> db +;; ==> (conc "SELECT id,area,version_name,store_type,copied,source_path,iteration,submitter,datetime,storegrp,datavol,quality,disk_id,comment,stored_path " +;; ==> " FROM pkgs WHERE area like ? AND version_name LIKE ? AND iteration " iter-filter ";") +;; ==> area-filter version-filter) +;; ==> (reverse res))) +;; ==> +;; ==> (define (datashare:get-pkg db area-name version-name #!key (iteration #f)) +;; ==> (let ((dat '()) +;; ==> (res #f)) +;; ==> (sqlite3:for-each-row ;; replace with fold ... +;; ==> (lambda (a . b) +;; ==> (set! dat (cons (list->vector (cons a b)) dat))) +;; ==> db +;; ==> (conc "SELECT id,area,version_name,store_type,copied,source_path,iteration,submitter,datetime,storegrp,datavol,quality,disk_id,comment,stored_path " +;; ==> " FROM pkgs WHERE area=? AND version_name=? ORDER BY iteration ASC;") +;; ==> area-name version-name) +;; ==> ;; now filter for iteration, either max if #f or specific one +;; ==> (if (null? dat) +;; ==> #f +;; ==> (let loop ((hed (car dat)) +;; ==> (tal (cdr dat)) +;; ==> (cur 0)) +;; ==> (let ((itr (datashare:pkg-get-iteration hed))) +;; ==> (if (equal? itr iteration) ;; this is the one if iteration is specified +;; ==> hed +;; ==> (if (null? tal) +;; ==> hed +;; ==> (loop (car tal)(cdr tal))))))))) +;; ==> +;; ==> (define (datashare:get-versions-for-area db area-name #!key (version-patt #f)) +;; ==> (let ((res '()) +;; ==> (data (make-hash-table))) +;; ==> (sqlite3:for-each-row +;; ==> (lambda (version-name submitter iteration submitted-time comment) +;; ==> ;; 0 1 2 3 4 +;; ==> (hash-table-set! data version-name (vector version-name submitter iteration submitted-time comment))) +;; ==> db +;; ==> "SELECT version_name,submitter,iteration,datetime,comment FROM pkgs WHERE area='megatest' AND version_name != 'latest' AND version_name LIKE ? ORDER BY datetime asc;" +;; ==> (or version-patt "%")) +;; ==> (map (lambda (x)(hash-table-ref data x))(sort (hash-table-keys data) string-ci>=)))) +;; ==> +;; ==> ;;====================================================================== +;; ==> ;; DATA IMPORT/EXPORT +;; ==> ;;====================================================================== +;; ==> +;; ==> (define (datashare:import-data configdat source-path dest-path area version iteration) +;; ==> (let* ((space-avail (car dest-path)) +;; ==> (disk-path (cdr dest-path)) +;; ==> (targ-path (conc disk-path "/" area "/" version "/" iteration)) +;; ==> (id (datashare:get-id db area version iteration)) +;; ==> (db (datashare:open-db configdat))) +;; ==> (if (> space-avail 10000) ;; dumb heuristic +;; ==> (begin +;; ==> (create-directory targ-path #t) +;; ==> (datashare:set-stored-path db id targ-path) +;; ==> (print "Running command: rsync -av " source-path "/ " targ-path "/") +;; ==> (let ((th1 (make-thread (lambda () +;; ==> (let ((pid (process-run "rsync" (list "-av" (conc source-path "/") (conc targ-path "/"))))) +;; ==> (process-wait pid) +;; ==> (datashare:set-copied db id "yes") +;; ==> (sqlite3:finalize! db))) +;; ==> "Data copy"))) +;; ==> (thread-start! th1)) +;; ==> #t) +;; ==> (begin +;; ==> (print "ERROR: Not enough space in storage area " dest-path) +;; ==> (datashare:set-copied db id "no") +;; ==> (sqlite3:finalize! db) +;; ==> #f)))) +;; ==> +;; ==> (define (datashare:get-areas configdat) +;; ==> (let* ((areadat (configf:get-section configdat "areas")) +;; ==> (areas (if areadat (map car areadat) '()))) +;; ==> areas)) +;; ==> +;; ==> (define (datashare:publish configdat publish-type area-name version comment spath submitter quality) +;; ==> ;; input checks +;; ==> (cond +;; ==> ((not (member area-name (datashare:get-areas configdat))) +;; ==> (cons #f (conc "Illegal area name \"" area-name "\""))) +;; ==> (else +;; ==> (let ((db (datashare:open-db configdat)) +;; ==> (iteration (datashare:register-data db area-name version publish-type submitter quality spath comment)) +;; ==> (dest-store (datashare:get-best-storage configdat))) +;; ==> (if iteration +;; ==> (if (eq? 'copy publish-type) +;; ==> (begin +;; ==> (datashare:import-data configdat spath dest-store area-name version iteration) +;; ==> (let ((id (datashare:get-id db area-name version iteration))) +;; ==> (datashare:set-latest db id area-name version iteration))) +;; ==> (let ((id (datashare:get-id db area-name version iteration))) +;; ==> (datashare:set-stored-path db id spath) +;; ==> (datashare:set-copied db id "yes") +;; ==> (datashare:set-copied db id "n/a") +;; ==> (datashare:set-latest db id area-name version iteration))) +;; ==> (print "ERROR: Failed to get an iteration number")) +;; ==> (sqlite3:finalize! db) +;; ==> (cons #t "Successfully saved data"))))) +;; ==> +;; ==> (define (datashare:get-best-storage configdat) +;; ==> (let* ((storage (configf:lookup configdat "settings" "storage")) +;; ==> (store-areas (if storage (string-split storage) '()))) +;; ==> (print "Looking for available space in " store-areas) +;; ==> (datashare:find-most-space store-areas))) +;; ==> +;; ==> ;; (string->number (list-ref (with-input-from-pipe "df -B1000000 /tmp" (lambda ()(read-line)(string-split (read-line)))) 3)) +;; ==> +;; ==> (define (datashare:find-most-space paths) +;; ==> (fold (lambda (area res) +;; ==> ;; (print "area=" area " res=" res) +;; ==> (let ((maxspace (car res)) +;; ==> (currpath (cdr res))) +;; ==> ;; (print currpath " " maxspace) +;; ==> (if (file-write-access? area) +;; ==> (let ((currspace (string->number +;; ==> (list-ref +;; ==> (with-input-from-pipe +;; ==> ;; (conc "df --output=avail " area) +;; ==> (conc "df -B1000000 " area) +;; ==> ;; (lambda ()(read)(read)) +;; ==> (lambda ()(read-line)(string-split (read-line)))) +;; ==> 3)))) +;; ==> (if (> currspace maxspace) +;; ==> (cons currspace area) +;; ==> res)) +;; ==> res))) +;; ==> (cons 0 #f) +;; ==> paths)) +;; ==> +;; ==> ;; remove existing link and if possible ... +;; ==> ;; create path to next of tip of target, create link back to source +;; ==> (define (datashare:build-dir-make-link source target) +;; ==> (if (common:file-exists? target)(datashare:backup-move target)) +;; ==> (create-directory (pathname-directory target) #t) +;; ==> (create-symbolic-link source target)) +;; ==> +;; ==> (define (datashare:backup-move path) +;; ==> (let* ((trashdir (conc (pathname-directory path) "/.trash")) +;; ==> (trashfile (conc trashdir "/" (current-seconds) "-" (pathname-file path)))) +;; ==> (create-directory trashdir #t) +;; ==> (if (directory? path) +;; ==> (system (conc "mv " path " " trashfile)) +;; ==> (file-move path trash-file)))) +;; ==> +;; ==> ;;====================================================================== +;; ==> ;; GUI +;; ==> ;;====================================================================== +;; ==> +;; ==> ;; The main menu +;; ==> (define (datashare:main-menu) +;; ==> (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) +;; ==> (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options +;; ==> (iup:menu-item "Open" action: (lambda (obj) +;; ==> (iup:show (iup:file-dialog)) +;; ==> (print "File->open " obj))) +;; ==> (iup:menu-item "Save" #:action (lambda (obj)(print "File->save " obj))) +;; ==> (iup:menu-item "Exit" #:action (lambda (obj)(exit))))) +;; ==> (iup:menu-item "Tools" (iup:menu +;; ==> (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah"))) +;; ==> ;; (iup:menu-item "Show dialog" #:action (lambda (obj) +;; ==> ;; (show message-window +;; ==> ;; #:modal? #t +;; ==> ;; ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current +;; ==> ;; ;; #:x 'mouse +;; ==> ;; ;; #:y 'mouse +;; ==> ;; ) +;; ==> )))) +;; ==> +;; ==> (define (datashare:publish-view configdat) +;; ==> ;; (pp (hash-table->alist configdat)) +;; ==> (let* ((areas (configf:get-section configdat "areas")) +;; ==> (label-size "70x") +;; ==> (areas-sel (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES")) +;; ==> (version-tb (iup:textbox #:expand "HORIZONTAL")) ;; #:size "50x")) +;; ==> (areas-sel (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES")) +;; ==> (component (iup:listbox #:expand "HORIZONTAL" #:dropdown "YES" )) +;; ==> (version-val (iup:textbox #:expand "HORIZONTAL" #:size "50x")) +;; ==> ;; (copy-link (iup:toggle #:expand "HORIZONTAL")) +;; ==> ;; (iteration (iup:textbox #:expand "YES" #:size "20x")) +;; ==> ;; (iteration (iup:textbox #:expand "HORIZONTAL" #:size "20x")) +;; ==> (area-filter (iup:textbox #:expand "HORIZONTAL" #:value "%")) +;; ==> (comment-tb (iup:textbox #:expand "YES" #:multiline "YES")) +;; ==> (source-tb (iup:textbox #:expand "HORIZONTAL" +;; ==> #:value (or (configf:lookup configdat "settings" "basepath") +;; ==> ""))) +;; ==> (publish (lambda (publish-type) +;; ==> (let* ((area-num (or (string->number (iup:attribute areas-sel "VALUE")) 0)) +;; ==> (area-dat (if (> area-num 0)(list-ref areas (- area-num 1))'("NOT SELECTED" "NOT SELECTED"))) +;; ==> (area-path (cadr area-dat)) +;; ==> (area-name (car area-dat)) +;; ==> (version (iup:attribute version-tb "VALUE")) +;; ==> (comment (iup:attribute comment-tb "VALUE")) +;; ==> (spath (iup:attribute source-tb "VALUE")) +;; ==> (submitter (current-user-name)) +;; ==> (quality 2)) +;; ==> (datashare:publish configdat publish-type area-name version comment spath submitter quality)))) +;; ==> (copy (iup:button "Copy and Publish" +;; ==> #:expand "HORIZONTAL" +;; ==> #:action (lambda (obj) +;; ==> (publish 'copy)))) +;; ==> (link (iup:button "Link and Publish" +;; ==> #:expand "HORIZONTAL" +;; ==> #:action (lambda (obj) +;; ==> (publish 'link)))) +;; ==> (browse-btn (iup:button "Browse" +;; ==> #:size "40x" +;; ==> #:action (lambda (obj) +;; ==> (let* ((fd (iup:file-dialog #:dialogtype "DIR")) +;; ==> (top (iup:show fd #:modal? "YES"))) +;; ==> (iup:attribute-set! source-tb "VALUE" +;; ==> (iup:attribute fd "VALUE")) +;; ==> (iup:destroy! fd)))))) +;; ==> (print "areas") +;; ==> ;; (pp areas) +;; ==> (fold (lambda (areadat num) +;; ==> ;; (print "Adding num=" num ", areadat=" areadat) +;; ==> (iup:attribute-set! areas-sel (conc num) (car areadat)) +;; ==> (+ 1 num)) +;; ==> 1 areas) +;; ==> (iup:vbox +;; ==> (iup:hbox (iup:label "Area:" #:size label-size) ;; area-filter +;; ==> areas-sel) +;; ==> (iup:hbox (iup:label "Version:" #:size label-size) version-tb) +;; ==> ;; (iup:hbox (iup:label "Link only" #:size label-size) copy-link) +;; ==> ;; (iup:label "Iteration:") iteration) +;; ==> (iup:hbox (iup:label "Comment:" #:size label-size) comment-tb) +;; ==> (iup:hbox (iup:label "Source base path:" #:size label-size) source-tb browse-btn) +;; ==> (iup:hbox copy link)))) +;; ==> +;; ==> (define (datashare:lst->path pathlst) +;; ==> (conc "/" (string-intersperse (map conc pathlst) "/"))) +;; ==> +;; ==> (define (datashare:path->lst path) +;; ==> (string-split path "/")) +;; ==> +;; ==> (define (datashare:pathdat-apply-heuristics configdat path) +;; ==> (cond +;; ==> ((common:file-exists? path) "found") +;; ==> (else (conc path " not installed")))) +;; ==> +;; ==> (define (datashare:get-view configdat) +;; ==> (iup:vbox +;; ==> (iup:hbox +;; ==> (let* ((label-size "60x") +;; ==> ;; filter elements +;; ==> (area-filter "%") +;; ==> (version-filter "%") +;; ==> (iter-filter ">= 0") +;; ==> ;; reverse lookup from path to data for src and installed +;; ==> (srcdat (make-hash-table)) ;; reverse lookup +;; ==> (installed-dat (make-hash-table)) +;; ==> ;; config values +;; ==> (basepath (configf:lookup configdat "settings" "basepath")) +;; ==> ;; gui elements +;; ==> (submitter (iup:label "" #:expand "HORIZONTAL")) +;; ==> (date-submitted (iup:label "" #:expand "HORIZONTAL")) +;; ==> (comment (iup:label "" #:expand "HORIZONTAL")) +;; ==> (copy-link (iup:label "" #:expand "HORIZONTAL")) +;; ==> (quality (iup:label "" #:expand "HORIZONTAL")) +;; ==> (installed-status (iup:label "" #:expand "HORIZONTAL")) +;; ==> ;; misc +;; ==> (curr-record #f) +;; ==> ;; (source-data (iup:label "" #:expand "HORIZONTAL")) +;; ==> (tb (iup:treebox +;; ==> #:value 0 +;; ==> #:name "Packages" +;; ==> #:expand "YES" +;; ==> #:addexpanded "NO" +;; ==> #:selection-cb +;; ==> (lambda (obj id state) +;; ==> ;; (print "obj: " obj ", id: " id ", state: " state) +;; ==> (let* ((path (datashare:lst->path (cdr (tree:node->path obj id)))) +;; ==> (record (hash-table-ref/default srcdat path #f))) +;; ==> (if record +;; ==> (begin +;; ==> (set! curr-record record) +;; ==> (iup:attribute-set! submitter "TITLE" (datashare:pkg-get-submitter record)) +;; ==> (iup:attribute-set! date-submitted "TITLE" (time->string (seconds->local-time (datashare:pkg-get-datetime record)))) +;; ==> (iup:attribute-set! comment "TITLE" (datashare:pkg-get-comment record)) +;; ==> (iup:attribute-set! quality "TITLE" (datashare:pkg-get-quality record)) +;; ==> (iup:attribute-set! copy-link "TITLE" (datashare:pkg-get-store_type record)) +;; ==> )) +;; ==> ;; (print "id=" id " path=" path " record=" record);; (tree:node->path obj id) " run-id: " run-id) +;; ==> )))) +;; ==> (tb2 (iup:treebox +;; ==> #:value 0 +;; ==> #:name "Installed" +;; ==> #:expand "YES" +;; ==> #:addexpanded "NO" +;; ==> #:selection-cb +;; ==> (lambda (obj id state) +;; ==> ;; (print "obj: " obj ", id: " id ", state: " state) +;; ==> (let* ((path (datashare:lst->path (cdr (tree:node->path obj id)))) +;; ==> (status (hash-table-ref/default installed-dat path #f))) +;; ==> (iup:attribute-set! installed-status "TITLE" (if status status "")) +;; ==> )))) +;; ==> (refresh (lambda (obj) +;; ==> (let* ((db (datashare:open-db configdat)) +;; ==> (areas (or (configf:get-section configdat "areas") '()))) +;; ==> ;; +;; ==> ;; first update the Sources +;; ==> ;; +;; ==> (for-each +;; ==> (lambda (pkgitem) +;; ==> (let* ((pkg-path (list (datashare:pkg-get-area pkgitem) +;; ==> (datashare:pkg-get-version_name pkgitem) +;; ==> (datashare:pkg-get-iteration pkgitem))) +;; ==> (pkg-id (datashare:pkg-get-id pkgitem)) +;; ==> (path (datashare:lst->path pkg-path))) +;; ==> ;; (print "tree:add-node tb=" tb ", pkg-path=" pkg-path ", pkg-id=" pkg-id) +;; ==> (if (not (hash-table-ref/default srcdat path #f)) +;; ==> (tree:add-node tb "Packages" pkg-path userdata: (conc "pkg-id: " pkg-id))) +;; ==> ;; (print "path=" path " pkgitem=" pkgitem) +;; ==> (hash-table-set! srcdat path pkgitem))) +;; ==> (datashare:get-pkgs db area-filter version-filter iter-filter)) +;; ==> ;; +;; ==> ;; then update the installed +;; ==> ;; +;; ==> (for-each +;; ==> (lambda (area) +;; ==> (let* ((path (conc "/" (cadr area))) +;; ==> (fullpath (conc basepath path))) +;; ==> (if (not (hash-table-ref/default installed-dat path #f)) +;; ==> (tree:add-node tb2 "Installed" (datashare:path->lst path))) +;; ==> (hash-table-set! installed-dat path (datashare:pathdat-apply-heuristics configdat fullpath)))) +;; ==> areas) +;; ==> (sqlite3:finalize! db)))) +;; ==> (apply (iup:button "Apply" +;; ==> #:action +;; ==> (lambda (obj) +;; ==> (if curr-record +;; ==> (let* ((area (datashare:pkg-get-area curr-record)) +;; ==> (stored-path (datashare:pkg-get-stored_path curr-record)) +;; ==> (source-type (datashare:pkg-get-store_type curr-record)) +;; ==> (source-path (case source-type ;; (equal? source-type "link")) +;; ==> ((link)(datashare:pkg-get-source-path curr-record)) +;; ==> ((copy)stored-path) +;; ==> (else #f))) +;; ==> (dest-stub (configf:lookup configdat "areas" area)) +;; ==> (target-path (conc basepath "/" dest-stub))) +;; ==> (datashare:build-dir-make-link stored-path target-path) +;; ==> (print "Creating link from " stored-path " to " target-path))))))) +;; ==> (iup:vbox +;; ==> (iup:hbox tb tb2) +;; ==> (iup:frame +;; ==> #:title "Source Info" +;; ==> (iup:vbox +;; ==> (iup:hbox (iup:button "Refresh" #:action refresh) apply) +;; ==> (iup:hbox (iup:label "Submitter: ") ;; #:size label-size) +;; ==> submitter +;; ==> (iup:label "Submitted on: ") ;; #:size label-size) +;; ==> date-submitted) +;; ==> (iup:hbox (iup:label "Data stored: ") +;; ==> copy-link +;; ==> (iup:label "Quality: ") +;; ==> quality) +;; ==> (iup:hbox (iup:label "Comment: ") +;; ==> comment))) +;; ==> (iup:frame +;; ==> #:title "Installed Info" +;; ==> (iup:vbox +;; ==> (iup:hbox (iup:label "Installed status/path: ") installed-status))) +;; ==> ))))) +;; ==> +;; ==> (define (datashare:manage-view configdat) +;; ==> (iup:vbox +;; ==> (iup:hbox +;; ==> (iup:button "Pushme" +;; ==> #:expand "YES" +;; ==> )))) +;; ==> +;; ==> (define (datashare:gui configdat) +;; ==> (iup:show +;; ==> (iup:dialog +;; ==> #:title (conc "DataShare dashboard " (current-user-name) ":" (current-directory)) +;; ==> #:menu (datashare:main-menu) +;; ==> (let* ((tabs (iup:tabs +;; ==> #:tabchangepos-cb (lambda (obj curr prev) +;; ==> (set! *datashare:current-tab-number* curr)) +;; ==> (datashare:publish-view configdat) +;; ==> (datashare:get-view configdat) +;; ==> (datashare:manage-view configdat) +;; ==> ))) +;; ==> ;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c))) +;; ==> (iup:attribute-set! tabs "TABTITLE0" "Publish") +;; ==> (iup:attribute-set! tabs "TABTITLE1" "Get") +;; ==> (iup:attribute-set! tabs "TABTITLE2" "Manage") +;; ==> ;; (iup:attribute-set! tabs "BGCOLOR" "190 190 190") +;; ==> tabs))) +;; ==> (iup:main-loop)) +;; ==> +;; ==> ;;====================================================================== +;; ==> ;; MISC +;; ==> ;;====================================================================== +;; ==> +;; ==> +;; ==> (define (datashare:do-as-calling-user proc) +;; ==> (let ((eid (current-effective-user-id)) +;; ==> (cid (current-user-id))) +;; ==> (if (not (eq? eid cid)) ;; running suid +;; ==> (set! (current-effective-user-id) cid)) +;; ==> ;; (print "running as " (current-effective-user-id)) +;; ==> (proc) +;; ==> (if (not (eq? eid cid)) +;; ==> (set! (current-effective-user-id) eid)))) +;; ==> +;; ==> (define (datashare:find name paths) +;; ==> (if (null? paths) +;; ==> #f +;; ==> (let loop ((hed (car paths)) +;; ==> (tal (cdr paths))) +;; ==> (if (common:file-exists? (conc hed "/" name)) +;; ==> hed +;; ==> (if (null? tal) +;; ==> #f +;; ==> (loop (car tal)(cdr tal))))))) +;; ==> +;; ==> ;;====================================================================== +;; ==> ;; MAIN +;; ==> ;;====================================================================== +;; ==> +;; ==> (define (datashare:load-config exe-dir exe-name) +;; ==> (let* ((fname (conc exe-dir "/." exe-name ".config"))) +;; ==> (ini:property-separator-patt " * *") +;; ==> (ini:property-separator #\space) +;; ==> (if (common:file-exists? fname) +;; ==> ;; (ini:read-ini fname) +;; ==> (read-config fname #f #t) +;; ==> (make-hash-table)))) +;; ==> +;; ==> (define (datashare:process-action configdat action . args) +;; ==> (case (string->symbol action) +;; ==> ((get) +;; ==> (if (< (length args) 2) +;; ==> (begin +;; ==> (print "ERROR: Missing arguments; " (string-intersperse args ", ")) +;; ==> (exit 1)) +;; ==> (let* ((basepath (configf:lookup configdat "settings" "basepath")) +;; ==> (db (datashare:open-db configdat)) +;; ==> (area (car args)) +;; ==> (version (cadr args)) ;; iteration +;; ==> (remargs (args:get-args args '("-i") '() args:arg-hash 0)) +;; ==> (iteration (if (args:get-arg "-i")(string->number (args:get-arg "-i")) #f)) +;; ==> (curr-record (datashare:get-pkg db area version iteration: iteration))) +;; ==> (if (not curr-record) +;; ==> (begin +;; ==> (print "ERROR: No matching record found; area=" area ", version=" version ", iteration=" (if iteration iteration "(max)")) +;; ==> (exit 1)) +;; ==> (let* ((stored-path (datashare:pkg-get-stored_path curr-record)) +;; ==> (source-type (datashare:pkg-get-store_type curr-record)) +;; ==> (source-path (case source-type ;; (equal? source-type "link")) +;; ==> ((link) (datashare:pkg-get-source-path curr-record)) +;; ==> ((copy) stored-path) +;; ==> (else #f))) +;; ==> (dest-stub (configf:lookup configdat "areas" area)) +;; ==> (target-path (conc basepath "/" dest-stub))) +;; ==> (datashare:build-dir-make-link stored-path target-path) +;; ==> (datashare:record-pkg-ref db (datashare:pkg-get-id curr-record) target-path) +;; ==> (sqlite3:finalize! db) +;; ==> (print "Creating link from " stored-path " to " target-path)))))) +;; ==> ((publish) +;; ==> (if (< (length args) 3) +;; ==> (begin +;; ==> (print "ERROR: Missing arguments; " (string-intersperse args ", ")) +;; ==> (exit 1)) +;; ==> (let* ((srcpath (list-ref args 0)) +;; ==> (areaname (list-ref args 1)) +;; ==> (version (list-ref args 2)) +;; ==> (remargs (args:get-args (drop args 2) +;; ==> '("-type" ;; link or copy (default is copy) +;; ==> "-m") +;; ==> '() +;; ==> args:arg-hash +;; ==> 0)) +;; ==> (publish-type (if (equal? (args:get-arg "-type") "link") 'link 'copy)) +;; ==> (comment (or (args:get-arg "-m") "")) +;; ==> (submitter (current-user-name)) +;; ==> (quality (args:get-arg "-quality")) +;; ==> (publish-res (datashare:publish configdat publish-type areaname version comment srcpath submitter quality))) +;; ==> (if (not (car publish-res)) +;; ==> (begin +;; ==> (print "ERROR: " (cdr publish-res)) +;; ==> (exit 1)))))) +;; ==> ((list-versions) +;; ==> (let ((area-name (car args)) ;; version patt full print +;; ==> (remargs (args:get-args args '("-vpatt") '("-full") args:arg-hash 0)) +;; ==> (db (datashare:open-db configdat)) +;; ==> (versions (datashare:get-versions-for-area db (car args) version-patt: (args:get-arg "-vpatt")))) +;; ==> ;; (print "area-name=" area-name " args=" args " *args-hash*=" (hash-table->alist *args-hash*)) +;; ==> (map (lambda (x) +;; ==> (if (args:get-arg "-full") +;; ==> (format #t +;; ==> "~10a~10a~4a~27a~30a\n" +;; ==> (vector-ref x 0) +;; ==> (vector-ref x 1) +;; ==> (vector-ref x 2) +;; ==> (conc "\"" (time->string (seconds->local-time (vector-ref x 3))) "\"") +;; ==> (conc "\"" (vector-ref x 4) "\"")) +;; ==> (print (vector-ref x 0)))) +;; ==> versions) +;; ==> (sqlite3:finalize! db))))) +;; ==> +;; ==> ;; ease debugging by loading ~/.dashboardrc - REMOVE FROM PRODUCTION! +;; ==> (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.datasharerc"))) +;; ==> (if (common:file-exists? debugcontrolf) +;; ==> (load debugcontrolf))) +;; ==> +;; ==> (define (main) +;; ==> (let* ((args (argv)) +;; ==> (prog (car args)) +;; ==> (rema (cdr args)) +;; ==> (exe-name (pathname-file (car (argv)))) +;; ==> (exe-dir (or (pathname-directory prog) +;; ==> (datashare:find exe-name (string-split (get-environment-variable "PATH") ":")))) +;; ==> (configdat (datashare:load-config exe-dir exe-name))) +;; ==> (cond +;; ==> ;; one-word commands +;; ==> ((eq? (length rema) 1) +;; ==> (case (string->symbol (car rema)) +;; ==> ((help -h -help --h --help) +;; ==> (print datashare:help)) +;; ==> ((list-areas) +;; ==> (map print (datashare:get-areas configdat))) +;; ==> (else +;; ==> (print "ERROR: Unrecognised command. Try \"datashare help\"")))) +;; ==> ;; multi-word commands +;; ==> ((null? rema)(datashare:gui configdat)) +;; ==> ((>= (length rema) 2) +;; ==> (apply datashare:process-action configdat (car rema)(cdr rema))) +;; ==> (else (print "ERROR: Unrecognised command. Try \"datashare help\""))))) +;; ==> +;; ==> (main) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -4659,11 +4659,11 @@ ;; the new prereqs calculation, looks also at itempath if specified ;; all prereqs must be met -;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met +;; if prereq test with itempath='' is in common:well-ended-states, then prereq is met ;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met ;; ;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) ;; mode 'toplevel means that tests must be COMPLETED only ;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] @@ -4674,10 +4674,11 @@ ;; 2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list ;; ;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) (define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items + (debug:print 4 *default-log-port* "db:get-prereqs-not-met: " waitons) (append (if (member 'exclusive mode) (let ((running-tests (db:get-tests-for-run dbstruct #f ;; run-id of #f means for all runs. (if (string=? ref-item-path "") ;; testpatt @@ -4700,10 +4701,12 @@ ;; (conc (db:test-get-testname testdat) ;; "/" ;; (db:test-get-item-path testdat)))) running-tests) ;; calling functions want the entire data '()) + + ;; collection of: for each waiton - ;; if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch: ;; if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite ;; if waiton is itemized: Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -22,18 +22,16 @@ (module dbmod * (import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) - -(define (just-testing) - (print "JUST TESTING")) - -;; (define (debug:print . params) #f) -;; (define (debug:print-info . params) #f) -;; -;; (define (set-functions dbgp dbgpinfo) -;; (set! debug:print dbgp) -;; (set! debug:print-info dbgpinfo)) - +(import (prefix sqlite3 sqlite3:) + posix typed-records srfi-18 + srfi-69) + +(define (db:run-id->dbname run-id) + (cond + ((number? run-id)(conc run-id ".db")) + ((not run-id) "main.db") + (else run-id))) + ) Index: docs/manual/Makefile ================================================================== --- docs/manual/Makefile +++ docs/manual/Makefile @@ -30,15 +30,15 @@ # asciidoc -b html5 -a icons -a iconsdir=$(DISPATH)/images/icons -a toc2 design_spec.txt # all : server.ps megatest_manual.html client.ps complex-itemmap.png megatest_manual.pdf -megatest_manual.html : megatest_manual.txt *.txt installation.txt *png +megatest_manual.html : megatest_manual.txt *.txt installation.txt *png *.dot asciidoc -b html5 -a icons -a iconsdir=$(DISPATH)/images/icons -a toc2 megatest_manual.txt # dos2unix megatest_manual.html -megatest_manual.pdf : megatest_manual.txt *.txt *png +megatest_manual.pdf : megatest_manual.txt *.txt *png *.dot a2x -a toc -f pdf megatest_manual.txt server.ps : server.dot dot -Tps server.dot > server.ps ADDED docs/manual/bisecting.dot Index: docs/manual/bisecting.dot ================================================================== --- /dev/null +++ docs/manual/bisecting.dot @@ -0,0 +1,31 @@ +// Copyright 2021, Matthew Welland. +// +// This file is part of Megatest. +// +// Megatest is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Megatest is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Megatest. If not, see . +// +digraph G { + rankdir=LR + subgraph cluster_1 { + node [style=filled,shape=box]; + + B [label="B\nProblem is here"]; + E [label="E\nProblem manifests here"]; + A -> B; + B -> C; + C -> D; + D -> E; + } + +} ADDED docs/manual/bisecting.png Index: docs/manual/bisecting.png ================================================================== --- /dev/null +++ docs/manual/bisecting.png cannot compute difference between binary files ADDED docs/manual/debugging.txt Index: docs/manual/debugging.txt ================================================================== --- /dev/null +++ docs/manual/debugging.txt @@ -0,0 +1,314 @@ +// Copyright 2021, Matthew Welland. +// +// This file is part of Megatest. +// +// Megatest is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Megatest is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Megatest. If not, see . + +Debugging +--------- + +Well Written Tests +~~~~~~~~~~~~~~~~~~ + +Test Design and Surfacing Errors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Design your tests to surface errors. Ensure that all logs are +processed by logpro (or a custom log processing tool) and can be +reached by a mouse click or two from the test control panel. + +To illustrate, here is a set of scripts with nested calls where script1.sh calls script2.sh which calls script3.sh which finally calls the Cadence EDA tool virtuoso: + +.script1.sh +.............................. +#!/bin/bash +code ... +script2.sh some parameters > script2.log +more code ... +.............................. + +.script2.sh +.............................. +#!/bin/bash +code ... +script3.sh some more parameters > script3.log +more code ... +.............................. + +.script3.sh +.............................. +#!/bin/bash +code ... +virtuoso params and switches ... +more code ... +.............................. + +The log files script2.log, script3.log and the log output from +virtuoso are not accessible from the test control panel. It would be +much better for future users of your automation to use steps more +fully. One easy option would be to post process the logs in downstream +additional steps: + +.testconfig +.............................. +[ezsteps] +step1 script1.sh +step2 cat script2.log +step3 cat script3.log + +[logpro] +step1 ;; some logpro rules + (expect:required in "LogFileBody" > 0 "Expect this output" #/something expected/) +step2 ;; some logpro rules for script2.sh +step3 ;; some logpro rules for script3.sh + +[scripts] +script1.sh #!/bin/bash + code ... + +... +.............................. + +With the above testconfig the logs for every critical part of the +automation are fully surfaced and rules can be created to flag errors, +warnings, aborts and to ignore false errors. A user of your automation +will be able to see the important error with two mouse clicks from the +runs view. + +An even better would be to eliminate the nesting if possible. As a +general statement with layers - less is usually more. By flattening +the automation into a sequence of steps you can use the test control +panel to re-run a step with a single click or from the test xterm run +only the errant step from the command line. + +The message here is make debugging and maintenace easy for future +users (and yourself) by keeping clicks-to-error in mind. + +Examining The Test Logs and Environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Test Control Panel - xterm +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +From the dashboard click on a test PASS/FAIL button. This brings up a +test control panel. Aproximately near the center left of the window +there is a button "Start Xterm". Push this to get an xterm with the +full context and environment loaded for that test. You can run scripts +or ezsteps by copying from the testconfig (hint, load up the +testconfig in a separate text editor window). + +With more recent versions of Megatest you can step through your test +from the test control panel. Click on the cell labeled "rerun this +step" to only rerun the step or click on "restart from here" to rerun +that step and downstream steps. + +NOTE 1: visual feedback can take some time, give it a few seconds and +you will see the step change color to blue as it starts running. + +NOTE 2: steping through only works if you are using ezsteps. + +A word on Bisecting +~~~~~~~~~~~~~~~~~~~ + +Bisecting is a debug strategy intended to speed up finding the root +cause of some bug. + +.A complex process with a problem found in stage "E" +["graphviz", "bisecting.png"] +---------------------------------------------------------------------- +include::bisecting.dot[] +---------------------------------------------------------------------- + +It is common to start debugging where the problem was observed and +then work back. However by inspecting the output at stage "C" in the +example above you would potentially save a lot of debug effort, this +is similar to the feature in source control tools like git and fossil +called biseceting. + +Tough Bugs +~~~~~~~~~~ + +Most bugs in Megatest based automation will be in the scripts called +in your test steps and if you utilize the good design practice +described above should be fairly easy for you to reproduce, isolate +and find. + +Some bugs however will come from subtle and hard to detect +interactions between Megatest and your OS and Unix environment. This +includes things like constructed variables that are legal in one +context (e.g. tcsh) but illegal in another context (e.g. bash), +variables that come from your login scripts and access and permissions +issues (e.g. a script that silently fails due to no access to needed +data). Other bugs might be due to Megatest itself. + +To isolate bugs like this you may need to look at the log files at +various stages in the execution process of your run and tests. + +.A simplified diagram of the stages Megatest goes through to run a test. +["graphviz", "megatest-test-stages.png"] +---------------------------------------------------------------------- +include::megatest-test-stages.dot[] +---------------------------------------------------------------------- + +.How to check variable values and inspect logs at each stage +[width="80%",cols="<,2m,2m",frame="topbot",options="header"] +|====================== +|Stage | How to inspect | Watch for or try ... +|A: post config processing | megatest -show-config -target your/target | #f (failed var processing) +|B: post runconfig | megatest -show-runconfig -target your/target | Add -debug 0,9 to see which file your settings come from +|C: processing testconfigs | inspect output from "megatest -run ..." | Messages indicating issues process configs, dependency problems +|D: process testconfig for test launch | inspect output from megatest runner | Zero items (items expansion yielded no items) +|E,F: launching test | start test xterm, look at mt_launch.log | Did your batch system accept the job? Has the job landed on a machine? +|G: starting test | look at your batch systems logs for the process | Did the megatest -execute process start and run? Extract the "megatest -execute ..." command and run it from your xterm. +|H,H1,H2: step exectution | look at .log, .html and your own internal logs | Do you have sufficiently tight logpro rules? You must always have a "required" rule! +|====================== + +Bisecting megatest.csh/sh +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometimes finding the environment variable that is causing the problem +can be very difficult. Bisection can be applied. + +Edit the megatest.csh or megatest.sh file and comment out 50% per +round, source in fresh xterm and run the test. + +This idea can also be applied to your .cshrc, .bashrc, .aliases and +other similar files. + +csh and -f +^^^^^^^^^^ + +A common issue when tcsh or csh shells are used for scripting is to +forget or choose to not use -f in your #! line. + +.Not good +.............................. +#!/bin/tcsh +... +.............................. + +.Good +.............................. +#!/bin/tcsh -f +... +.............................. + + +Config File Processing +^^^^^^^^^^^^^^^^^^^^^^ + +As described above it is often helpful to know the content of +variables in various contexts as Megatest works through the actions +needed to run your tests. A handy technique is to force the startup of +an xterm in the context being examined. + +For example, if an item list is not being generated as expected you +can inject the startup of an xterm as if it were an item: + +.Original items table +----------------- +[items] +CELLNAME [system getcellname.sh] +----------------- + +.Items table modified for debug +----------------- +[items] +DEBUG [system xterm] +CELLNAME [system getcellnames.sh] +----------------- + +When this test is run an xterm will pop up. In that xterm the +environment is exactly that in which the script "getcellnames.sh" +would run. You can now debug the script to find out why it isn't +working as expected. + +Similarly in a script just call the xterm. +NOTE: This technique can be very helpful in debugging running of EDA tools in Perl, Ruby, Python or tcl scripts: + +.Perl example +.............................. +some_code(); +$cmdline="virtuoso -some-switches and params ..."; +print "$cmdline"; # print the command line so you can paste it into the xterm that pops up +system("xterm"); # this line is added for the debug and removed when done +system($cmdline); +more_code(); +.............................. + +Misc Other Debugging Hints +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Annotating scripts and config files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Run the "env" command to record the environment: + +env | sort > stagename.log + +In a config file: + +.megatest.config, runconfigs.config and testconfig +................... +#{shell env | sort > stagename.log} + +# or + +[system env | sort > stagename.log] +................... + +In scripts just insert the commands, this example helps you identify +if "some commands ..." changed any environment variables.: + +.myscript.sh +.............................. +env | sort > somefile-before.log +some commands ... +env | sort > somefile-after.log +.............................. + +.Use meld to examine the differences +.............................. +meld somefile-before.log somefile-after.log +.............................. + +Oneshot Modifying a Variable +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To try various values for a variable without mutating the current value + +.within a bash shell +.............................. +SOMEVAR=123 runcmd.sh +.............................. + +.within csh +.............................. +(setenv SOMEVAR 123;runcmd.sh) + +# OR + +env SOMEVAR=123 runcmd.sh +.............................. + +Capturing output from a command +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.Use the "script" utility +.............................. +script -c "virtuoso -params and switches ..." +.............................. + + + Index: docs/manual/howto.txt ================================================================== --- docs/manual/howto.txt +++ docs/manual/howto.txt @@ -110,15 +110,15 @@ # if defined and not "no" flexi-launcher will bypass launcher unless there is no # match. flexi-launcher yes ------------------------ -Tricks ------- +Tricks and Tips +--------------- -This section is a compendium of a various useful tricks for debugging, -configuring and generally getting the most out of Megatest. +This section is a collection of a various useful tricks for that +didn't quite fit elsewhere. Limiting your running jobs ~~~~~~~~~~~~~~~~~~~~~~~~~~ The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously. @@ -136,50 +136,10 @@ [jobgroups] group1 10 custdes 4 --------------- -Debugging Tricks ----------------- - -Examining The Environment -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Test Control Panel - xterm -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -From the dashboard click on a test PASS/FAIL button. This brings up a test control panel. Aproximately near the center left of the -window there is a button "Start Xterm". Push this to get an xterm with the full context and environment loaded for that test. You can run -scripts or ezsteps by copying from the testconfig (hint, load up the testconfig in a separate gvim or emacs window). This is the easiest way -to debug your tests. - -During Config File Processing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It is often helpful to know the content of variables in various -contexts as Megatest does the actions needed to run your tests. A handy technique is to force the startup of an xterm in the context being examined. - -For example, if an item list is not being generated as expected you -can inject the startup of an xterm as if it were an item: - -.Original items table ------------------ -[items] -CELLNAME [system getcellname.sh] ------------------ - -.Items table modified for debug ------------------ -[items] -DEBUG [system xterm] -CELLNAME [system getcellnames.sh] ------------------ - -When this test is run an xterm will pop up. In that xterm the -environment is exactly that in which the script "getcellnames.sh" -would run. You can now debug the script to find out why it isn't -working as expected. Organising Your Tests and Tasks ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The default location "tests" for storing tests can be extended by ADDED docs/manual/megatest-test-stages.dot Index: docs/manual/megatest-test-stages.dot ================================================================== --- /dev/null +++ docs/manual/megatest-test-stages.dot @@ -0,0 +1,42 @@ +// Copyright 2021, Matthew Welland. +// +// This file is part of Megatest. +// +// Megatest is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Megatest is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Megatest. If not, see . +// +digraph G { + // rankdir=LR + subgraph cluster_1 { + node [style=filled,shape=box]; + A [label="A: Process megatest.config"] + B [label="B: Process runconfig.config"] + A -> B [label="resolve vars"] + B2 [label="B2: Resolve variables"] + B -> B2 -> A + C [label="C: Process testconfigs (find tests to run)"] + D [label="D: Process testconfig for test of interest"] + E [label="E: Set vars for launching test"] + F [label="F: Launch into batch system, ssh, batch system\nand different hosts can all \nimpact variable values"] + G [label="G: Process testconfig again with all available variables"] + H [label="H: Start test"] + H1 [label="H1: Start step"] + H2 [label="H2: Execute step script"] + I [label="I: End step, process logfile with logpro"] + I -> H [label="Run remaining steps"] + H-> H1 -> H2 -> I + B->C->D->E->F->G->H + I->D + } + +} ADDED docs/manual/megatest-test-stages.png Index: docs/manual/megatest-test-stages.png ================================================================== --- /dev/null +++ docs/manual/megatest-test-stages.png cannot compute difference between binary files Index: docs/manual/megatest_manual.html ================================================================== --- docs/manual/megatest_manual.html +++ docs/manual/megatest_manual.html @@ -773,13 +773,13 @@

The Megatest Users Manual

Matt Welland
<matt@kiatoa.com>
version 1.5, June 2020 -
-
Table of Contents
- +
+
Table of Contents
+

Preface

@@ -808,22 +808,17 @@

Why Megatest?

-

The Megatest project was started for two reasons, the first was an -immediate and pressing need for a generalized tool to manage a suite -of regression tests and the second was the fact that I had written or -maintained several such tools at different companies over the years. I -thought a single open source tool, flexible enough to meet the needs -of any team doing continuous integration and or running a complex -suite of tests for release qualification would solve some problems for -me and for others.

-
-
-
-- Matt Welland, original author of the Megatest tool suite.
-
+

Megatest was created to provide a generalized tool for managing suites +of regression tests and to provide a multi-host, distributed +alternative to "make". The EDA world is littered with proprietory, +company-specific tools for this purpose and by going open source and +keeping the tool flexible the hope is that Megatest could be useful to +any team at any company for continuous integration and almost any +other general automation tasks.

Megatest Design Philosophy

@@ -959,15 +954,10 @@ Static
- -
-

Road Map

-
-

TODO / Road Map

Note: This road-map is a wish list and not a formal plan. Items are in @@ -1941,10 +1931,310 @@

This test runs a single step called "stepname1" which runs a script "stepname.sh". Note that although it is common to put the actions needed for a test step into a script it is not necessary.

+
+
+

Debugging

+
+
+

Well Written Tests

+
+

Test Design and Surfacing Errors

+

Design your tests to surface errors. Ensure that all logs are +processed by logpro (or a custom log processing tool) and can be +reached by a mouse click or two from the test control panel.

+

To illustrate, here is a set of scripts with nested calls where script1.sh calls script2.sh which calls script3.sh which finally calls the Cadence EDA tool virtuoso:

+
+
script1.sh
+
+
#!/bin/bash
+code ...
+script2.sh some parameters > script2.log
+more code ...
+
+
+
script2.sh
+
+
#!/bin/bash
+code ...
+script3.sh some more parameters > script3.log
+more code ...
+
+
+
script3.sh
+
+
#!/bin/bash
+code ...
+virtuoso params and switches ...
+more code ...
+
+

The log files script2.log, script3.log and the log output from +virtuoso are not accessible from the test control panel. It would be +much better for future users of your automation to use steps more +fully. One easy option would be to post process the logs in downstream +additional steps:

+
+
testconfig
+
+
[ezsteps]
+step1 script1.sh
+step2 cat script2.log
+step3 cat script3.log
+
+[logpro]
+step1 ;; some logpro rules
+  (expect:required in "LogFileBody" > 0 "Expect this output" #/something expected/)
+step2 ;; some logpro rules for script2.sh
+step3 ;; some logpro rules for script3.sh
+
+[scripts]
+script1.sh #!/bin/bash
+ code ...
+
+...
+
+

With the above testconfig the logs for every critical part of the +automation are fully surfaced and rules can be created to flag errors, +warnings, aborts and to ignore false errors. A user of your automation +will be able to see the important error with two mouse clicks from the +runs view.

+

An even better would be to eliminate the nesting if possible. As a +general statement with layers - less is usually more. By flattening +the automation into a sequence of steps you can use the test control +panel to re-run a step with a single click or from the test xterm run +only the errant step from the command line.

+

The message here is make debugging and maintenace easy for future +users (and yourself) by keeping clicks-to-error in mind.

+
+
+
+

Examining The Test Logs and Environment

+
+

Test Control Panel - xterm

+

From the dashboard click on a test PASS/FAIL button. This brings up a +test control panel. Aproximately near the center left of the window +there is a button "Start Xterm". Push this to get an xterm with the +full context and environment loaded for that test. You can run scripts +or ezsteps by copying from the testconfig (hint, load up the +testconfig in a separate text editor window).

+

With more recent versions of Megatest you can step through your test +from the test control panel. Click on the cell labeled "rerun this +step" to only rerun the step or click on "restart from here" to rerun +that step and downstream steps.

+

NOTE 1: visual feedback can take some time, give it a few seconds and +you will see the step change color to blue as it starts running.

+

NOTE 2: steping through only works if you are using ezsteps.

+
+
+
+

A word on Bisecting

+

Bisecting is a debug strategy intended to speed up finding the root +cause of some bug.

+
+
+bisecting.png +
+
Figure 1. A complex process with a problem found in stage "E"
+
+

It is common to start debugging where the problem was observed and +then work back. However by inspecting the output at stage "C" in the +example above you would potentially save a lot of debug effort, this +is similar to the feature in source control tools like git and fossil +called biseceting.

+
+
+

Tough Bugs

+

Most bugs in Megatest based automation will be in the scripts called +in your test steps and if you utilize the good design practice +described above should be fairly easy for you to reproduce, isolate +and find.

+

Some bugs however will come from subtle and hard to detect +interactions between Megatest and your OS and Unix environment. This +includes things like constructed variables that are legal in one +context (e.g. tcsh) but illegal in another context (e.g. bash), +variables that come from your login scripts and access and permissions +issues (e.g. a script that silently fails due to no access to needed +data). Other bugs might be due to Megatest itself.

+

To isolate bugs like this you may need to look at the log files at +various stages in the execution process of your run and tests.

+
+
+megatest-test-stages.png +
+
Figure 2. A simplified diagram of the stages Megatest goes through to run a test.
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2. How to check variable values and inspect logs at each stage
Stage How to inspect Watch for or try …

A: post config processing

megatest -show-config -target your/target

#f (failed var processing)

B: post runconfig

megatest -show-runconfig -target your/target

Add -debug 0,9 to see which file your settings come from

C: processing testconfigs

inspect output from "megatest -run …"

Messages indicating issues process configs, dependency problems

D: process testconfig for test launch

inspect output from megatest runner

Zero items (items expansion yielded no items)

E,F: launching test

start test xterm, look at mt_launch.log

Did your batch system accept the job? Has the job landed on a machine?

G: starting test

look at your batch systems logs for the process

Did the megatest -execute process start and run? Extract the "megatest -execute …" command and run it from your xterm.

H,H1,H2: step exectution

look at <stepname>.log, <stepname>.html and your own internal logs

Do you have sufficiently tight logpro rules? You must always have a "required" rule!

+
+

Bisecting megatest.csh/sh

+

Sometimes finding the environment variable that is causing the problem +can be very difficult. Bisection can be applied.

+

Edit the megatest.csh or megatest.sh file and comment out 50% per +round, source in fresh xterm and run the test.

+

This idea can also be applied to your .cshrc, .bashrc, .aliases and +other similar files.

+
+
+

csh and -f

+

A common issue when tcsh or csh shells are used for scripting is to +forget or choose to not use -f in your #! line.

+
+
Not good
+
+
#!/bin/tcsh
+...
+
+
+
Good
+
+
#!/bin/tcsh -f
+...
+
+
+
+

Config File Processing

+

As described above it is often helpful to know the content of +variables in various contexts as Megatest works through the actions +needed to run your tests. A handy technique is to force the startup of +an xterm in the context being examined.

+

For example, if an item list is not being generated as expected you +can inject the startup of an xterm as if it were an item:

+
+
Original items table
+
+
[items]
+CELLNAME [system getcellname.sh]
+
+
+
Items table modified for debug
+
+
[items]
+DEBUG [system xterm]
+CELLNAME [system getcellnames.sh]
+
+

When this test is run an xterm will pop up. In that xterm the +environment is exactly that in which the script "getcellnames.sh" +would run. You can now debug the script to find out why it isn’t +working as expected.

+

Similarly in a script just call the xterm. +NOTE: This technique can be very helpful in debugging running of EDA tools in Perl, Ruby, Python or tcl scripts:

+
+
Perl example
+
+
some_code();
+$cmdline="virtuoso -some-switches and params ...";
+print "$cmdline"; # print the command line so you can paste it into the xterm that pops up
+system("xterm");  # this line is added for the debug and removed when done
+system($cmdline);
+more_code();
+
+
+
+
+

Misc Other Debugging Hints

+
+

Annotating scripts and config files

+

Run the "env" command to record the environment:

+

env | sort > stagename.log

+

In a config file:

+
+
megatest.config, runconfigs.config and testconfig
+
+
#{shell env | sort > stagename.log}
+
+# or
+
+[system env | sort > stagename.log]
+
+

In scripts just insert the commands, this example helps you identify +if "some commands …" changed any environment variables.:

+
+
myscript.sh
+
+
env | sort > somefile-before.log
+some commands ...
+env | sort > somefile-after.log
+
+
+
Use meld to examine the differences
+
+
meld somefile-before.log somefile-after.log
+
+
+
+

Oneshot Modifying a Variable

+

To try various values for a variable without mutating the current value

+
+
within a bash shell
+
+
SOMEVAR=123 runcmd.sh
+
+
+
within csh
+
+
(setenv SOMEVAR 123;runcmd.sh)
+
+# OR
+
+env SOMEVAR=123 runcmd.sh
+
+
+
+

How To Do Things

@@ -2036,14 +2326,14 @@
-

Tricks

+

Tricks and Tips

-

This section is a compendium of a various useful tricks for debugging, -configuring and generally getting the most out of Megatest.

+

This section is a collection of a various useful tricks for that +didn’t quite fit elsewhere.

Limiting your running jobs

The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously.

In your testconfig:

@@ -2056,49 +2346,10 @@
[jobgroups]
 group1 10
 custdes 4
-
-
-
-
-

Debugging Tricks

-
-
-

Examining The Environment

-
-

Test Control Panel - xterm

-

From the dashboard click on a test PASS/FAIL button. This brings up a test control panel. Aproximately near the center left of the -window there is a button "Start Xterm". Push this to get an xterm with the full context and environment loaded for that test. You can run -scripts or ezsteps by copying from the testconfig (hint, load up the testconfig in a separate gvim or emacs window). This is the easiest way -to debug your tests.

-
-
-

During Config File Processing

-

It is often helpful to know the content of variables in various -contexts as Megatest does the actions needed to run your tests. A handy technique is to force the startup of an xterm in the context being examined.

-

For example, if an item list is not being generated as expected you -can inject the startup of an xterm as if it were an item:

-
-
Original items table
-
-
[items]
-CELLNAME [system getcellname.sh]
-
-
-
Items table modified for debug
-
-
[items]
-DEBUG [system xterm]
-CELLNAME [system getcellnames.sh]
-
-

When this test is run an xterm will pop up. In that xterm the -environment is exactly that in which the script "getcellnames.sh" -would run. You can now debug the script to find out why it isn’t -working as expected.

-

Organising Your Tests and Tasks

The default location "tests" for storing tests can be extended by adding to your tests-paths section.

@@ -2148,11 +2399,11 @@

Megatest Use Modes

- + @@ -2210,11 +2461,11 @@

Various helpers for more advanced config files.

Table 2. Base commandsTable 3. Base commands
- + @@ -2495,11 +2746,11 @@

Database settings

Table 3. HelpersTable 4. Helpers
- + @@ -3008,10 +3259,14 @@ lookittmp ;; Note: config file format supports multi-line entries where leading whitespace is removed from each line ;; a blank line indicates the end of the block of text (expect:required in "LogFileBody" > 0 "A file name that should never exist!" #/This is a awfully stupid file name that should never be found in the temp dir/)

To transfer the environment to the next step you can do the following:

+# if your upstream file is csh you can force csh like this +# if your upstream is bash +loadenv source $REF/ourenviron.sh +
Propagate environment to next step
$MT_MEGATEST -env2file .ezsteps/${stepname}
@@ -3160,11 +3415,11 @@ fail gracefully if it doesn’t exist.

Table 4. Database config settings in [setup] section of megatest.configTable 5. Database config settings in [setup] section of megatest.config
- + @@ -3394,11 +3649,11 @@

These routines can be called from the megatest repl.

Table 5. Environment variables visible to the trigger scriptTable 6. Environment variables visible to the trigger script
Variable
- + ADDED docs/manual/megatest_manual.pdf Index: docs/manual/megatest_manual.pdf ================================================================== --- /dev/null +++ docs/manual/megatest_manual.pdf cannot compute difference between binary files Index: docs/manual/megatest_manual.txt ================================================================== --- docs/manual/megatest_manual.txt +++ docs/manual/megatest_manual.txt @@ -33,20 +33,17 @@ ---------------------------- Why Megatest? ------------- -The Megatest project was started for two reasons, the first was an -immediate and pressing need for a generalized tool to manage a suite -of regression tests and the second was the fact that I had written or -maintained several such tools at different companies over the years. I -thought a single open source tool, flexible enough to meet the needs -of any team doing continuous integration and or running a complex -suite of tests for release qualification would solve some problems for -me and for others. - - -- Matt Welland, original author of the Megatest tool suite. +Megatest was created to provide a generalized tool for managing suites +of regression tests and to provide a multi-host, distributed +alternative to "make". The EDA world is littered with proprietory, +company-specific tools for this purpose and by going open source and +keeping the tool flexible the hope is that Megatest could be useful to +any team at any company for continuous integration and almost any +other general automation tasks. Megatest Design Philosophy -------------------------- Megatest is a distributed system intended to provide the minimum needed @@ -98,24 +95,26 @@ which can launch jobs on local and remote Linux hosts. Currently megatest uses the network filesystem to call home to your master sqlite3 database. Megatest has been used with the Intel Netbatch and lsf (also known as openlava) batch systems and it should be straightforward to use it with other similar systems. + +// :leveloffset: 0 include::overview.txt[] include::plan.txt[] - + include::installation.txt[] include::getting_started.txt[] include::study_plan.txt[] -// :leveloffset: 0 - include::writing_tests.txt[] + +include::debugging.txt[] include::howto.txt[] include::reference.txt[] Index: docs/manual/plan.txt ================================================================== --- docs/manual/plan.txt +++ docs/manual/plan.txt @@ -1,8 +1,5 @@ -Road Map --------- - // This file is part of Megatest. // // Megatest is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or Index: docs/manual/reference.txt ================================================================== --- docs/manual/reference.txt +++ docs/manual/reference.txt @@ -205,10 +205,27 @@ [setup] # this will automatically kill the test if it runs for more than 1h 2m and 3s runtimelim 1h 2m 3s ----------------- +Post Run Hook ++++++++++++++ + +This runs script to-run.sh after all tests have been completed. It is +not necessary to use -run-wait as each test will check for other +running tests on completion and if there are none it will call the +post run hook. + +Note that the output from the script call will be placed in a log file +in the logs directory with a file name derived by replacing / with _ +in post-hook--.log. + +------------------- +[runs] +post-hook /path/to/script/to-run.sh +------------------- + Tests browser view ~~~~~~~~~~~~~~~~~~ The tests browser (see the Run Control tab on the dashboard) has two views for displaying the tests. @@ -720,10 +737,14 @@ (expect:required in "LogFileBody" > 0 "A file name that should never exist!" #/This is a awfully stupid file name that should never be found in the temp dir/) ----------------- To transfer the environment to the next step you can do the following: +# if your upstream file is csh you can force csh like this +# if your upstream is bash +loadenv source $REF/ourenviron.sh + .Propagate environment to next step ---------------------------- $MT_MEGATEST -env2file .ezsteps/${stepname} ---------------------------- ADDED docs/megatest-debug-tutorial.odp Index: docs/megatest-debug-tutorial.odp ================================================================== --- /dev/null +++ docs/megatest-debug-tutorial.odp cannot compute difference between binary files Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -104,11 +104,11 @@ (pid #f)) (let ((proc (lambda () (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) (if subrun (begin - (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") + (debug:print-info 0 *default-log-port* "Running "cmd" without MT_.* environment variables.") (common:without-vars proc "^MT_.*")) (proc))) (with-output-to-file "Makefile.ezsteps" (lambda () @@ -171,20 +171,21 @@ ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass (else 'fail))) - (overall-status (cond + (overall-status + (cond ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3) - (else 'fail))) - (next-status (cond - ((eq? overall-status 'pass) this-step-status) - ((eq? overall-status 'warn) - (if (eq? this-step-status 'fail) 'fail 'warn)) - ((eq? overall-status 'abort) 'abort) - (else 'fail))) + ((eq? (launch:einf-rollup-status exit-info) 3) 'check) + ((eq? (launch:einf-rollup-status exit-info) 4) 'waived) + ((eq? (launch:einf-rollup-status exit-info) 5) 'abort) + ((eq? (launch:einf-rollup-status exit-info) 6) 'skip) + ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) + (else 'fail))) + (next-status (common:worse-status-sym this-step-status overall-status)) + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? (cond ((null? tal) ;; more to run? "COMPLETED") (else "RUNNING")))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -53,11 +53,11 @@ ;; given an exit code and whether or not logpro was used calculate OK/BAD ;; return #t if we are ok, #f otherwise (define (steprun-good? logpro exitcode stepparms) (or (eq? exitcode 0) - (and logpro (eq? exitcode 2)) ;; shouldn't this be (member exitcode 2 ...) with the other ok codes? + (and logpro (member exitcode '( 2 4 6))) (let* ((params (alist-ref 'params stepparms)) ;; get the params section (keep-going (if params (alist-ref "keep-going" params equal?) #f))) (debug:print 0 *default-log-port* "keep-going=" keep-going) @@ -159,11 +159,11 @@ ;; 5. fix testpatt or calculate it from contour ;; 6. launch the run ;; 7. roll up the run result and or roll up the logpro processed result (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested (subrun:initialize-toprun-test testconfig test-run-dir) - (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) + (let* ((mt-cmd (subrun:launch-cmd test-run-dir (configf:lookup testconfig "subrun" "runwait")))) (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") (set! ezsteps #t) ;; set the needed flag (set! ezstepslst (append (or ezstepslst '()) (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) @@ -173,11 +173,15 @@ (let* ((all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; where 'params is the params list (add other stuff as needed) (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0)) (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") - (let loop ((ezstep (car ezstepslst)) + (let ((all-step-names (map car ezstepslst)) + (status-file (file-open "ezsteps.status" (+ open/append open/wronly open/creat))) + ) + (setenv "MT_STEP_NAMES" (string-intersperse all-step-names " ")) + (let loop ((ezstep (car ezstepslst)) (tal (cdr ezstepslst)) (prevstep #f)) (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") ;; check exit-info (vector-ref exit-info 1) (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) @@ -187,15 +191,22 @@ (setenv "MT_STEP_NAME" stepname) (pp (hash-table->alist all-steps-dat)) ;; if logpro-used read in the stepname.dat file (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) (launch:load-logpro-dat run-id test-id stepname)) + + (file-write status-file (conc stepname " " (launch:einf-exit-code exit-info) "\n")) + (if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms) (if (not (null? tal)) (loop (car tal) (cdr tal) stepname)) (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) - (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) + (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))) + (file-close status-file) + ) + + )))))) (define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) (start-seconds (current-seconds)) (calc-minutes (lambda () @@ -476,21 +487,23 @@ (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) ) ;; prime it for running ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) (if (process:alive-on-host? test-host test-pid) (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - (exit))) + (exit 1))) ((member (db:test-get-state test-info) '("COMPLETED")) ;; we do NOT want to re-run COMPLETED jobs. Mark as NOT_STARTED to run! - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") - (exit)) + (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") + (debug:print 0 *default-log-port* "exiting with status 1") + (exit 1)) ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:general-call 'set-test-start-time #f test-id) (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)) (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") - (exit)))) + (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") + (debug:print 0 *default-log-port* "exiting with status 1") + (exit 1)))) ;; cleanup prior execution's steps (rmt:delete-steps-for-test! run-id test-id) (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) @@ -643,11 +656,12 @@ (print content) (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))))) (else (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) scripts)) - ;; + ;; + (let* ((m (make-mutex)) (kill-job? #f) (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status (job-thread #f) ;; (keep-going #t) @@ -657,29 +671,33 @@ (runit (lambda () (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) (monitorjob (lambda () (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) (th1 (make-thread monitorjob "monitor job")) - (th2 (make-thread runit "run job"))) + (th2 (make-thread runit "run job")) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t)) + (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code")) + (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED")) + (test-status "not set") + ) (set! job-thread th2) (thread-start! th1) (thread-start! th2) (thread-join! th2) - (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (debug:print-info 2 *default-log-port* "exit-info = " exit-info) (hash-table-set! misc-flags 'keep-going #f) (thread-join! th1) (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) ;; only state and status needed - use lazy routine (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; Am I completed? - (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) - (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status - ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test - ) - (new-status (cond + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED")) + (new-status (cond ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) @@ -688,35 +706,53 @@ (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") - (else "FAIL")))) ;; (db:test-get-status testinfo))) - (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) + (else "FAIL"))) + ) ;; (db:test-get-status testinfo))) + (debug:print-info 0 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) - ;; Leave a .final-status file for each sub-test - (tests:save-final-status run-id test-id) + ;; Leave a .final-status file for each sub-test + (tests:save-final-status run-id test-id) (tests:test-set-status! run-id test-id new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! - )) + ) + ) + + ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no - ;; Leave a .final-status file for the top level test - (tests:save-final-status run-id test-id) - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + ;; Leave a .final-status file for the top level test + (tests:save-final-status run-id test-id) + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let* + (mutex-unlock! m) (launch:end-of-run-check run-id ) (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") + + + (set! test-status (db:test-get-status (rmt:get-testinfo-state-status run-id test-id))) + + ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1. + + (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list)) + (begin + (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) + (set! *globalexitstatus* 1) + ) + ) + (if (not (launch:einf-exit-status exit-info)) (exit 4)))) ))) ;; Spec for End of test @@ -768,30 +804,46 @@ (tal (cdr not-completed-tests))) (let* ((test-name (vector-ref running-test 2)) (item-path (vector-ref running-test 11))) (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") (if (not (null? tal)) - (loop (car tal) (cdr tal))))))))))) - -(define (launch:is-test-alive host pid) + (loop (car tal) (cdr tal))))))))))) + +;; replaced below with version that does not ssh if checking on localhost +#;(define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) (let* ((cmd (conc "ssh " host " pstree -A " pid)) (output (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) #t)) - + +(define (launch:is-test-alive host pid) + (let* ((same-host (equal? host (get-host-name))) + (cmd (conc + (if same-host "" (conc "ssh "host" ")) + "pstree -A "pid))) + (if (and host pid + (not (equal? host "n/a"))) + + (let* ((output (with-input-from-pipe cmd read-lines))) + (debug:print 2 *default-log-port* "Running " cmd " received " output) + (if (eq? (length output) 0) + #f + #t)) + #t))) ;; assuming bad query is about a live test is likely not the right thing to do? + (define (launch:kill-tests-if-dead run-id) (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) (let loop ((running-test (car running-tests)) (tal (cdr running-tests)) (kill-cnt 0)) (let* ((test-name (vector-ref running-test 2)) (item-path (vector-ref running-test 11)) - (test-id (vector-ref running-test 0)) + (test-id (vector-ref running-test 0)) (host (vector-ref running-test 6)) (pid (rmt:test-get-top-process-pid run-id test-id)) (event-time (vector-ref running-test 5)) (duration (vector-ref running-test 12)) (flag 0) @@ -1105,19 +1157,20 @@ (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) (hash-table-ref/default confdat "disks" #f))) (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) (string->number (or m "10000"))))) (if disks - (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb + (let ((res (common:get-disk-with-most-free-space disks minspace))) (if res (cdr res) - (begin ;; DEAD CODE PATH - REVISIT! -;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") -;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) - ;;(exit 1) - (if (null? disks) + ;; else if no valid disks... + (begin + (debug:print 0 *default-log-port* "WARNING: No valid disks or no disk with enough space found from " disks) + (if (null? disks) (cons 1 (conc *toppath* "/runs")) + + ;; else try to create the directories anyway. (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) (let loop ((head (car paths)) (tail (cdr paths))) (let ((result (handle-exceptions exn (begin (debug:print 0 *default-log-port* "failed to create dir " (cadr head) ", exn=" exn) @@ -1124,22 +1177,32 @@ #f) (create-directory (cadr head) #t)))) (if result result (if (null? tail) - (cons 1 (conc *toppath* "/runs")) - (loop (car tail) (cdr tail))))))))))) - ;; no disks definition - use mtrah/runs, fall back to currdir/runs + (begin + (debug:print 0 *default-log-port* "Using toppath/runs") + (conc *toppath* "/runs") + ) + (loop (car tail) (cdr tail)))))) + ) + ) ;; if null? disks + ) ;; if not res + ) + ) + ;; no disks definition - use toppath/runs, fall back to currdir/runs (let* ((toppath (or *toppath* (common:get-toppath *toppath*) (begin (debug:print-error 0 *default-log-port* "Creating runs dir in current directory, this is probably not what you wanted. Please check your setup.") (current-directory)))) (runsdir (conc toppath "/runs"))) (if (not (file-exists? runsdir))(create-directory runsdir)) runsdir) ))) ;; the code creates the necessary directories if it does not exist and returns the path. + + (define (launch:test-copy test-src-path test-path) (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) (if cmd ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH @@ -1471,10 +1534,11 @@ ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) ;; (pp (hash-table->alist tconfig)) (set! diskpath (get-best-disk *configdat* tconfig)) + (debug:print 2 *default-log-port* "best disk path = " diskpath) (if diskpath (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) (debug:print-info 2 *default-log-port* "Using work area " work-area)) @@ -1513,10 +1577,12 @@ (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) + + (setenv "MT_CMDINFO" cmdparms) ;; setting this for use in nblauncher ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway (if (common:file-exists? work-area) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.6584) +(define megatest-version 1.6592) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -137,25 +137,42 @@ ;;====================================================================== ;; T R I G G E R S ;;====================================================================== -(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status) +(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status target runname) ;; Putting the commandline into ( )'s means no control over the shell. ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files ;; or equivalent. No need to do this. Just run it? - (let* ((fullcmd (conc "nbfake " + (let* ((new-trigger-format (configf:lookup *configdat* "setup" "new-trigger-format")) + (fullcmd + (if (and new-trigger-format (string=? new-trigger-format "yes")) + (conc "nbfake " + cmd " " + test-id " " + test-rundir " " + trigger " " + actual-state " " + actual-status " " + event-time " " + target " " + runname " " + test-name " " + item-path + ) + (conc "nbfake " cmd " " test-id " " test-rundir " " trigger " " test-name " " - item-path " " ;; has / prepended to deal with toplevel tests + item-path " " actual-state " " actual-status " " event-time - )) + ) + )) (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) (setenv "NBFAKE_LOG" (conc (cond ((and (directory-exists? test-rundir) (file-write-access? test-rundir)) test-rundir) @@ -163,18 +180,15 @@ (file-write-access? *toppath*)) *toppath*) (else (conc "/tmp/" (current-user-name)))) "/" logname)) (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) - ;; (call-with-environment-variables - ;; `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname))) - ;; (lambda () (process-run fullcmd) (if prev-nbfake-log (setenv "NBFAKE_LOG" prev-nbfake-log) (unsetenv "NBFAKE_LOG")) - )) ;; )) + )) (define (mt:process-triggers dbstruct run-id test-id newstate newstatus) (if test-id (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) (if test-dat @@ -184,11 +198,13 @@ (duration (db:test-get-run_duration test-dat)) (comment (db:test-get-comment test-dat)) (event-time (db:test-get-event_time test-dat)) (tconfig #f) (state (if newstate newstate (db:test-get-state test-dat))) - (status (if newstatus newstatus (db:test-get-status test-dat)))) + (status (if newstatus newstatus (db:test-get-status test-dat))) + (target (getenv "MT_TARGET")) + (runname (getenv "MT_RUNNAME"))) ;; (mutex-lock! *triggers-mutex*) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus @@ -217,14 +233,14 @@ (for-each (lambda (trigger) (let* ((munged-trigger (string-translate trigger "/ " "--")) (logname (conc "last-trigger-" munged-trigger ".log"))) ;; first any triggers from the testconfig (let ((cmd (configf:lookup tconfig "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status target runname))) ;; next any triggers from megatest.config (let ((cmd (configf:lookup *configdat* "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status target runname))))) (list (conc state "/" status) (conc state "/") (conc "/" status))) (pop-directory)) Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -154,10 +154,11 @@ show [areas|contours... ] : show areas, contours or other section from megatest.config gendot : generate a graphviz dot file from pkts. Contour actions: process : runs import, rungen and dispatch + go : runs import, rungen and dispatch every five minutes forever Trigger propagation actions: tsend a=b,c=d... : send trigger info to all recpients in the [listeners] section tlisten -port N : listen for trigger info on port N @@ -773,11 +774,11 @@ (begin (print-call-chain) (print "FAILED TO RUN RUNNAME MAPPER " callname " FOR AREA " area) (print " message: " ((condition-property-accessor 'exn 'message) exn)) runname) - (print "(mapper " (string-intersperse (list runkey runname area area-path reason contour mode-patt) ", ") ")") + (print "(mapper " (string-intersperse (map conc (list runkey runname area area-path reason contour mode-patt)) ", ") ")") (mapper runkey runname area area-path reason contour mode-patt)) (case callname ((auto #f) runname) (else runtrans))))) (new-target target) ;; I believe we will want target manipulation here .. (map-targets xlatr-key runkey area contour)) @@ -1612,23 +1613,70 @@ ;; (hash-table-keys adjargs)) (let-values (((uuid pkt) (command-line->pkt *action* adjargs #f area-path: area-path new-ss: new-ss))) (print "run log @ " (conc (current-directory) "/" uuid "-" *action* ".log")) (write-pkt pktsdir uuid pkt)))) - ((dispatch import rungen process) + ((dispatch import rungen process go) (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) - (toppath (configf:lookup mtconf "scratchdat" "toppath"))) + (toppath (configf:lookup mtconf "scratchdat" "toppath")) + (period (configf:lookup-number mtconf "mtutil" "autorun-period" default: 300)) + (rest-time (configf:lookup-number mtconf "mtutil" "autorun-rest" default: 30))) + (print "Using period="period" and rest time="rest-time) (case (string->symbol *action*) ((process) (begin (common:load-pkts-to-db mtconf) (generate-run-pkts mtconf toppath) (common:load-pkts-to-db mtconf) (dispatch-commands mtconf toppath))) ((import) (common:load-pkts-to-db mtconf)) ;; import pkts ((rungen) (generate-run-pkts mtconf toppath)) - ((dispatch) (dispatch-commands mtconf toppath))))) + ((dispatch) (dispatch-commands mtconf toppath)) + ;; [mtutil] + ;; # approximate interval between run processing in mtutil (seconds) + ;; autorun-period 300 + ;; # minimal rest period between processing + ;; autorun-rest 30 + ((go) + ;; determine if I'm the boss + (if (file-exists? "mtutil-go.pid") + (begin + (print "ERROR: mtutil go is already running under host and pid " (with-input-from-file "mtutil-go.pid" read-line) + ". Please kill that process and remove the file \"mutil-go.pid\" and try again.") + (exit))) + (with-output-to-file "mtutil-go.pid" (lambda ()(print (get-host-name) " " (current-process-id)))) + (print "Starting long running import, rungen, and process loop") + (if (file-exists? "do-not-run-mtutil-go") + (begin + (print "NOTE: Removing flag file "(current-directory)"/do-not-run-mtutil-go") + (delete-file* "do-not-run-mtutil-go"))) + (let loop ((last-run (- (current-seconds) (+ period 10))) ;; fake out first time in + (this-run (current-seconds))) + (if (file-exists? "do-not-run-mtutil-go") + (begin + (print "File do-not-run-mtutil-go exists, exiting.") + (delete-file* "mtutil-go.pid") + (exit))) + (let ((delta (- this-run last-run))) + (if (>= delta period) + (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat))) + (print "Running import at " (current-seconds)) + (common:load-pkts-to-db mtconf) + (print "Running generate run pkts at " (current-seconds)) + (generate-run-pkts mtconf toppath) + (print "Running run dispatch at " (current-seconds)) + (common:load-pkts-to-db mtconf) + (dispatch-commands mtconf toppath) + (print "Done running import, generate, and dispatch done in " (- (current-seconds) this-run)) + (print "NOTE: touch " (current-directory) "/do-not-run-mtutil-go to kill this runner.") + (loop this-run (current-seconds))) + (let ((now (current-seconds))) + (print "Sleeping " rest-time " seconds, next run in aproximately " (- period (- now last-run)) " seconds") + (thread-sleep! rest-time) + (loop last-run (current-seconds)))))) + (delete-file* "mtutil-go.pid"))))) ;; misc ((show) (if (> (length remargs) 0) (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) @@ -1807,46 +1855,52 @@ ) ))) (loop (nn-recv rep)))) (print "ERROR: Port " portnum " already in use. Try another port"))))))) - - - - - ((tlisten) - (if (null? remargs) - (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"") - (let ((portnum (string->number (car remargs)))) - - (if (not portnum) - (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs)) - (begin - (if (not (is-port-in-use portnum)) - (let* ((rep (start-nn-server portnum)) - (mtconfdat (simple-setup (args:get-arg "-start-dir"))) - (mtconf (car mtconfdat)) - (contact (configf:lookup mtconf "listener" "owner")) - (script (configf:lookup mtconf "listener" "script"))) - (print "Listening on port " portnum " for messages.") - (set-signal-handler! signal/int (lambda (signum) - (set! *time-to-exit* #t) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!") - (let ((email-body (mtut:stml->string (s:body - (s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". ")))))) - (sendmail contact "Listner has been terminated." email-body use_html: #t)) - (exit))) - (set-signal-handler! signal/term (lambda (signum) - (set! *time-to-exit* #t) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!") - (let ((email-body (mtut:stml->string (s:body - (s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". ")))))) - (sendmail contact "Listner has been terminated." email-body use_html: #t)) - (exit))) - - ;(set-signal-handler! signal/term special-signal-handler) - + + ((tlisten) + (if (null? remargs) + (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"") + (let ((portnum (string->number (car remargs)))) + + (if (not portnum) + (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs)) + (begin + (if (not (is-port-in-use portnum)) + (let* ((rep (start-nn-server portnum)) + (mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat)) + (contact (configf:lookup mtconf "listener" "owner")) + (script (configf:lookup mtconf "listener" "script"))) + (print "Listening on port " portnum " for messages.") + (set-signal-handler! signal/int + (lambda (signum) + (set! *time-to-exit* #t) + (debug:print-error 0 *default-log-port* "Received signal " signum + " sending email befor exiting !!") + (let ((email-body (mtut:stml->string + (s:body + (s:p (conc "Received signal " signum + ". Lister has been terminated on host " + (get-environment-variable "HOST") ". ")))))) + (sendmail contact "Listner has been terminated." email-body use_html: #t)) + (exit))) + (set-signal-handler! signal/term (lambda (signum) + (set! *time-to-exit* #t) + (debug:print-error 0 *default-log-port* "Received signal " + signum " sending email befor exiting !!") + (let ((email-body (mtut:stml->string + (s:body + (s:p (conc "Received signal " signum + ". Lister has been terminated on host " + (get-environment-variable "HOST") ". ")))))) + (sendmail contact "Listner has been terminated." email-body use_html: #t)) + (exit))) + + ;; (set-signal-handler! signal/term special-signal-handler) + (let loop ((instr (nn-recv rep))) (nn-send rep "ok") (let ((ctime (date->string (current-date)))) (if (equal? instr "time-to-die") (begin Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -501,10 +501,11 @@ ;; ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; +;; run-count is passed from megatest.scm as configf:lookup *configdat* "setup" "reruns", or defaults to 1. (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields *configdat*)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) ;; (deferred '()) ;; delay running these since they have a waiton clause @@ -624,11 +625,11 @@ ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) - (debug:print-info 0 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) + (debug:print-info 2 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified @@ -685,11 +686,11 @@ (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names) (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. (setenv "MT_TEST_NAME" hed) ;; - (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry)) + (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry (tests:get-global-waitons))) ;; NOTE: Have the config - can extract [waitons] section ((hed-mode) (let ((m (configf:lookup config "requirements" "mode"))) @@ -738,11 +739,11 @@ (waiton-itemized (and waiton-tconfig (or (hash-table-ref/default waiton-tconfig "items" #f) (hash-table-ref/default waiton-tconfig "itemstable" #f)))) (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) - (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") + (debug:print-info 2 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt ;; is this satisfied by merely appending "/" to the waiton name added to the list? ;; ;; This approach causes all of the items in an upstream test to be run @@ -760,11 +761,11 @@ (set! test-patts new-test-patts)) (begin (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it") (set! tal (append (cons waiton tal)(list hed))))) (begin - (debug:print-info 0 *default-log-port* "Adding non-itemized test " waiton " to required-tests") + (debug:print-info 2 *default-log-port* "Adding non-itemized test " waiton " to required-tests") (set! required-tests (cons waiton required-tests)) (set! test-patts new-test-patts))) (begin (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it") (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) @@ -897,10 +898,11 @@ ;; (tal (cdr sorted-test-names)) ;; (reg '()) ;; registered, put these at the head of tal ;; (reruns '())) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) (let* ((loop-list (list hed tal reg reruns)) + (junk (debug:print-info 4 *default-log-port* "expand-items calling rmt:get-prereqs-not-met")) (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) (if (list? res) res (begin (debug:print 0 *default-log-port* @@ -1090,11 +1092,11 @@ ((null? runnables) (debug:print-info 4 *default-log-port* "cond branch - " "ei-7") #f) ;; if we get here and non-completed is null then it is all over. (else (debug:print-info 4 *default-log-port* "cond branch - " "ei-8") - (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") + (debug:print 2 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") (list (car newtal)(cdr newtal) reg reruns))))) (define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) (if (null? inlst) '() @@ -1667,11 +1669,10 @@ (loop (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) (runs:queue-next-reg tal reg reglen regfull) reruns)))) ;; (loop (car tal)(cdr tal) reg reruns)))) - (runs:incremental-print-results run-id) (debug:print 4 *default-log-port* "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed "\n tal: " (runs:pretty-long-list tal) @@ -1851,16 +1852,18 @@ (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) ) ) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) - + + ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) + ((not (null? reruns)) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-6") (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) @@ -1869,10 +1872,11 @@ (set! num-retries (+ num-retries 1)) ;; (thread-sleep! (+ 1 *global-delta*)) (if (not (null? newlst)) ;; since reruns have been tacked on to newlst create new reruns from junked (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) + ((not (null? tal)) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-7") (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) ((not (null? reg)) ;; could we get here with leftovers? (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") @@ -1972,10 +1976,11 @@ ;; All these vars might be referenced by the testconfig file reader ;; ;; NEED to reprocess testconfig here, ensuring that item variables are available. ;; This is for Tal's issue with item-specific env vars not being set for use in skip. ;; HSD https://hsdes.intel.com/appstore/icf/index.html#/article?articleId=1408763273 + ;; Also later HSD https://hsdes.intel.com/appstore/article/#/14012138487 ;; (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) (item-path "") @@ -1983,16 +1988,17 @@ (full-test-name #f)) ;; setting itemdat to a list if it is #f (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) + (set-item-env-vars itemdat) (set! full-test-name (db:test-make-full-name test-name item-path)) (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process - (let* ((test-conf ;; re-instate the tests:get-testconfig once the kinks are worked out. FIXME!!! - ;; (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t)) - (tests:testqueue-get-testconfig test-record )) + (let* ((test-conf ;; re-instated the tests:get-testconfig to fix HSD https://hsdes.intel.com/appstore/article/#/14012138487, need to be able to skip using [items], [itemstable] variables. + ;; (tests:testqueue-get-testconfig test-record )) ;; vector-ref test-record 3 + (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t)) (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... (force (hash-table-ref/default flags "-force" #f)) (rerun (hash-table-ref/default flags "-rerun" #f)) (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) @@ -2108,11 +2114,11 @@ (else (set! runflag #f))) (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) (if (not runflag) (if (not parent-test) (if (runs:lownoise (conc "not starting test" full-test-name) 60) - (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) + (debug:print 3 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) "\" or -force to override"))) ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are ;; already met. ;; This would be a great place to do the process-fork @@ -2393,11 +2399,11 @@ (tasks:kill-runner target run-name testpatt) ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) + (debug:print 2 *default-log-port* "Modifying state and status for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) @@ -2534,11 +2540,11 @@ ) ; end let ); end cond has-subrun (else ;; BB - TODO - consider backgrounding to threads to delete tests (work below) - (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) + (debug:print-info 2 *default-log-port* "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? @@ -2729,11 +2735,11 @@ ) (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) - (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (debug:print-info 2 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 *default-log-port* "Recursively removing " realpath) @@ -3057,17 +3063,17 @@ (files (if (common:file-exists? runtop) (append (glob (conc runtop "/.megatest*")) (glob (conc runtop "/.runconfig*"))) '()))) (if (null? files) - (debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.") + (debug:print-info 2 *default-log-port* "No cached megatest or runconfigs files found. None removed.") (begin - (debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) + (debug:print-info 2 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) (for-each (lambda (f) (handle-exceptions exn (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f ", exn=" exn) (delete-file f))) files)))) (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -111,12 +111,12 @@ ((fs) result) (else (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) result))) -;; Given a run id start a server process ### NOTE ### > file 2>&1 -;; if the run-id is zero and the target-host is set +;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area (let* ((curr-host (get-host-name)) @@ -154,21 +154,22 @@ (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) - (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time - ;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever - #;(common:wait-for-homehost-load load-limit (conc " delaying server start due to load on homehost. limit is " load-limit)) + (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time + (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) (thread-join! log-rotate) (pop-directory))) -;; given a path to a server log return: host port startseconds -;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let +;; given a path to a server log return: host port startseconds server-id +;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let +;; example of what it's looking for in the log file: +;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 (define (server:logf-get-start-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0)) @@ -201,18 +202,18 @@ (string->number (caddr dat)) (cadr (cddr dat)))))) (begin (if dbprep-found (begin - (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds)) - (thread-sleep! 25) + (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) + (thread-sleep! 0.5) ;; was 25 sec but that blocked things from starting? ) - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))) ) (list #f #f #f #f))))))))) -;; get a list of servers with all relevant data +;; get a list of servers from the log files, with all relevant data ;; ( mod-time host port start-time pid ) ;; (define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) (day-seconds (* 24 60 60))) @@ -228,17 +229,19 @@ (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) (directory-exists? (conc areapath "/logs"))) '())) - ;; Get the list of server logs that do not contain "exiting". Ignore logs for servers that have exited. - (let* ((server-logs-cmd (conc "grep -iL exiting " areapath "/logs/server-*-*.log")) - (server-logs (string-split (string-chomp (call-with-input-pipe server-logs-cmd read-all)))) + ;; Get the list of server logs. First remove logs for servers that have exited. + (let* ( + ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. + ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) + (server-logs (glob (conc areapath "/logs/server-*-*.log"))) (num-serv-logs (length server-logs))) (if (or (null? server-logs) (= num-serv-logs 0)) (let () - (debug:print 1 *default-log-port* "There are no servers running") + (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) '() ) (let loop ((hed (string-chomp (car server-logs))) (tal (cdr server-logs)) (res '())) @@ -365,66 +368,70 @@ (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) -;; wait for server=start-last to be three seconds old + +;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough. +;; if it is old enough, overwrite it and wait 0.25 seconds. +;; if it then has the wrong server key, wait + 1 and call this function recursively. ;; (define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last")) ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) - (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) + (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) (server-key (conc (get-host-name) "-" (current-process-id)))) (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (delta (- (current-seconds) fmodtime)) - (all-go (> delta reftime))) - (if (and all-go + (old-enough (> delta idletime)) + (new-server-key "") + ) + + ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t. + ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process. + (if (and old-enough (begin - (debug:print-info 0 *default-log-port* "Writing " start-flag) - (with-output-to-file start-flag - (lambda () - (print server-key))) + (debug:print-info 2 *default-log-port* "Writing " start-flag) + (with-output-to-file start-flag (lambda () (print server-key))) (thread-sleep! 0.25) - (let ((res (with-input-from-file start-flag - (lambda () - (read-line))))) - (equal? server-key res)))) - #t ;; (system (conc "touch " start-flag)) ;; lazy but safe + (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) + (equal? server-key new-server-key)) + ) + #t + + ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. (begin (debug:print-info 0 *default-log-port* "Gating server start, last start: " - fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) - (thread-sleep! reftime) + (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) + + (thread-sleep! ( + 1 idletime)) (server:wait-for-server-start-last-flag areapath))))))) -;; kind start up of servers, wait 40 seconds before allowing another server for a given -;; run-id to be launched + + +;; kind start up of server, wait before allowing another server for a given +;; area to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last - ;; and wait for it to be at least 3 seconds old + ;; and wait for it to be at least seconds old (server:wait-for-server-start-last-flag areapath) (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun - (call-num (car last-run-dat)) - (when-run (cadr last-run-dat)) - (run-delay (+ (case call-num - ((0) 0) - ((1) 20) - ((2) 300) - (else 600)) - (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously + (let* ( (lock-file (conc areapath "/logs/server-start.lock"))) - (if (> (- (current-seconds) when-run) run-delay) - (let* ((start-flag (conc areapath "/logs/server-start-last"))) - (common:simple-file-lock-and-wait lock-file expire-time: 15) - (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) - (system (conc "touch " start-flag)) ;; lazy but safe - (server:run areapath) - (thread-sleep! 2) ;; don't release the lock for at least a few seconds - (common:simple-file-release-lock lock-file))) - (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) + (let* ((start-flag (conc areapath "/logs/server-start-last"))) + (common:simple-file-lock-and-wait lock-file expire-time: 25) + (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) + (system (conc "touch " start-flag)) ;; lazy but safe + (server:run areapath) + (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". + (common:simple-file-release-lock lock-file))) + + (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.") + ) +) ;; this one seems to be the general entry point ;; (define (server:start-and-wait areapath #!key (timeout 60)) (let ((give-up-time (+ (current-seconds) timeout))) @@ -449,17 +456,17 @@ (or ns numservers))) ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) ;; #!key (numservers "2")) - (let* ((ns (server:get-num-servers)) + (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed (servers (server:get-best (server:get-list areapath)))) (if (or (and servers (null? servers)) (not servers) (and (list? servers) - (< (length servers) (random ns)))) ;; somewhere between 0 and numservers + (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers #f (let loop ((hed (car servers)) (tal (cdr servers))) (let ((res (server:check-server hed))) (if res Index: subrun.scm ================================================================== --- subrun.scm +++ subrun.scm @@ -128,17 +128,17 @@ (kill-result (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) kill-result) #t)) -(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work +(define (subrun:launch-cmd test-run-dir run-mode #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work (if (subrun:subrun-removed? test-run-dir) (subrun:unset-subrun-removed test-run-dir)) (let* ((log-prefix "run") (switches (subrun:selector+log-switches test-run-dir log-prefix)) - (run-wait #t) + (run-wait (equal? run-mode "yes")) (cmd (conc "megatest " sub-cmd " " switches" " (if run-wait "-run-wait " "")))) cmd)) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -53,10 +53,11 @@ ;; gets paths from configs and finds valid tests ;; returns hash of testname --> fullpath ;; (define (tests:get-all) (let* ((test-search-path (tests:get-tests-search-path *configdat*))) + (debug:print 8 *default-log-port* "test-search-path: " test-search-path) (tests:get-valid-tests (make-hash-table) test-search-path))) (define (tests:get-tests-search-path cfgdat) (let ((paths (let ((section (if cfgdat (configf:get-section cfgdat "tests-paths") @@ -135,10 +136,16 @@ ((string? res) res) ;;; FIX THE ROOT CAUSE HERE .... ((null? res) #f) ((string? (cdr res)) (cdr res)) ;; it is a pair ((string? (cadr res))(cadr res)) ;; it is a list (else cadr res)))))) + +(define (tests:get-global-waitons rconfig) + (let* ((global-waitons (runconfigs-get config "!GLOBAL_WAITONS"))) + (if (string? global-waitons) + (string-split global-waitons) + '()))) ;; return items given config ;; (define (tests:get-items tconfig) (let ((items (hash-table-ref/default tconfig "items" #f)) ;; items 4 @@ -167,11 +174,11 @@ (else #f)))) ;; not iterated ;; returns waitons waitors tconfigdat ;; -(define (tests:get-waitons test-name all-tests-registry) +(define (tests:get-waitons test-name all-tests-registry global-waitons) (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t) (let ((instr (if config (configf:lookup config "requirements" "waiton") (begin ;; No config means this is a non-existant test (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") @@ -178,11 +185,11 @@ (exit 1)))) (instr2 (if config (configf:lookup config "requirements" "waitor") ""))) (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2) - (let ((newwaitons + (let ((newwaitons-tmp (string-split (cond ((procedure? instr) ;; here (let ((res (instr))) (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name) res)) @@ -197,11 +204,14 @@ (debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name) res)) ((string? instr2) instr2) (else ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) - ""))))) + "")))) + (newwaitons (if global-waitons + (append newwaitons-tmp global-waitons) + newwaitons-tmp))) (values ;; the waitons (filter (lambda (x) (if (hash-table-ref/default all-tests-registry x #f) #t @@ -909,11 +919,11 @@ ; (set! page (+ 1 page)) (if (> total-runs (* (+ 1 page) pg-size)) (loop (+ 1 page))))) (common:simple-file-release-lock lockfile)) (begin - (debug-print 0 *default-log-port* "Failed to get lock on file outf, lockfile: " lockfile) #f)))) + (debug:print 0 *default-log-port* "Failed to get lock on file outf, lockfile: " lockfile) #f)))) (define (tests:readlines filename) (call-with-input-file filename (lambda (p) @@ -1607,11 +1617,11 @@ (and wait-a-minute (> tries-left 0)) (thread-sleep! 10) (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires (loopa (sub1 tries-left))) (else - (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires + (debug:print 2 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires #f)))) (tcfg (if testexists (read-config test-configf #f system-allowed environ-patt: (if system-allowed "pre-launch-env-vars" @@ -1842,30 +1852,43 @@ ;; refactoring this block into tests:get-full-data from line 263 of runs.scm ;;====================================================================== ;; hed is the test name ;; test-records is a hash of test-name => test record (define (tests:get-full-data test-names test-records required-tests all-tests-registry) - (if (not (null? test-names)) + (let ((missing-waitons (make-hash-table))) + (if (not (null? test-names)) (let loop ((hed (car test-names)) (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") ;; don't know item-path at this time, let the testconfig get the top level testconfig (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) (waitons (let ((instr (if config (configf:lookup config "requirements" "waiton") - (begin ;; No config means this is a non-existant test - (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.") - "")))) + (begin ;; No config means this is a non-existent test + (let ((waiters '())) + ;; find the waiter(s) for this waiton. + (for-each + (lambda(waiter) + ;; (print "test-record = " (hash-table-ref test-records waiter)) + ;; (print "waitons = " (vector-ref (hash-table-ref test-records waiter) 2)) + (if (member hed (vector-ref (hash-table-ref test-records waiter) 2)) + (set! waiters (cons waiter waiters)) + ) + ) + (hash-table-keys test-records)) + (hash-table-set! missing-waitons hed waiters) + ) + "")))) (debug:print-info 8 *default-log-port* "waitons string is " instr) (string-split (cond ((procedure? instr) (let ((res (instr))) (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed) res)) ((string? instr) instr) (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed) + ;; NOTE: This is actually the case of *no* waitons! ;; "")))))) (if (not config) ;; this is a non-existant test called in a waiton. (if (null? tal) test-records (loop (car tal)(cdr tal))) @@ -1910,21 +1933,28 @@ (items:get-items-from-config config)) (else #f))) ;; not iterated #f ;; itemsdat 5 #f ;; spare - used for item-path ))) - (for-each + (for-each (lambda (waiton) - (if (and waiton (not (member waiton test-names))) + (if (and waiton (not (string= "#f" waiton)) (not (member waiton test-names))) (begin (set! required-tests (cons waiton required-tests)) (set! test-names (cons waiton test-names))))) ;; was an append, now a cons waitons) (let ((remtests (delete-duplicates (append waitons tal)))) (if (not (null? remtests)) (loop (car remtests)(cdr remtests)) - test-records)))))))) + test-records))))))) + (for-each + (lambda (missing-waiton) + (debug:print-error 0 *default-log-port* "non-existent test \"" missing-waiton "\" is a waiton for tests " (hash-table-ref missing-waitons missing-waiton)) + ) + (hash-table-keys missing-waitons) + ) +)) ;;====================================================================== ;; test steps ;;====================================================================== Index: tests/mintest/megatest.config ================================================================== --- tests/mintest/megatest.config +++ tests/mintest/megatest.config @@ -19,16 +19,15 @@ X TEXT [setup] max_concurrent_jobs 50 linktree #{getenv MT_RUN_AREA_HOME}/linktree -transport http [server] port 8090 [jobtools] useshell yes -launcher nbfind +launcher nbfake [disks] disk0 #{getenv PWD}/runs Index: tests/mintest/tests/a/testconfig ================================================================== --- tests/mintest/tests/a/testconfig +++ tests/mintest/tests/a/testconfig @@ -16,8 +16,15 @@ # along with Megatest. If not, see . # Add steps here. Format is "stepname script" [ezsteps] step1 echo SUCCESS +# step1 ((xterm&);echo SUCCESS) +# step2 xterm_and_success + +[scripts] +xterm_and_success #!/bin/bash + xterm -T step2 & + echo SUCCESS [requirements] waiton b Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -44,10 +44,19 @@ else export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$libdir fi export MT_SQLITE3_EXE=$sqlite3_exe + +http_vars="http_proxy https_proxy HTTP_PROXY HTTPS_PROXY" +for i in \$http_vars +do +j=\${!i} +if [ "\$j" != "" ]; then + unset \$i +fi +done __EOF ) > $cfgfile echo else echo "INFO: LD_LIBRARY_PATH not set" >&2 @@ -93,11 +102,9 @@ # echo "#!/bin/bash" > $target # echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target echo "lsbr=\$(lsb_release -sr)" >> $target -if [ "$LD_LIBRARY_PATH" != "" ];then - echo "source $prefix/bin/.\$lsbr/cfg.sh" >> $target -fi +echo "source $prefix/bin/.\$lsbr/cfg.sh" >> $target # echo "if [[ -e \$lsbr ]];then source \$lsbr;fi" >> $target echo "exec $prefix/bin/.\$lsbr/$cmd \"\$@\"" >> $target
Table 6. API Keys Related CallsTable 7. API Keys Related Calls