Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -314,19 +314,40 @@ # install dashboard as dboard so wrapper script can be called dashboard $(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard chmod a+x $(PREFIX)/bin/dashboard $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so : lib/libpangox-1.0.so + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libpangox-1.0.so $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so; \ + fi + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 : lib/libpangox-1.0.so.0 + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libpangox-1.0.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0; \ + fi + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 : lib/libxcb-xlib.so.0 + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \ + fi install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \ - $(PREFIX)/share/js/jquery-3.1.0.slim.min.js + $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 # $(PREFIX)/bin/.$(ARCHSTR)/ndboard # $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -383,18 +383,18 @@ (debug:print 4 *default-log-port* "server-id:" *server-id*) (let* ((cmd ($ 'cmd)) (paramsj ($ 'params)) (key ($ 'key)) (params (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?) - (debug:print 0 *default-log-port* "cmd:" cmd " with params " params "key " key) + (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key) (if (equal? key *server-id*) (begin (set! *api-process-request-count* (+ *api-process-request-count* 1)) (let* ((resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result ) (success (vector-ref resdat 0)) (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?) - (debug:print 0 *default-log-port* "res:" res) + (debug:print 4 *default-log-port* "res:" res) (if (not success) (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params)) (if (> *api-process-request-count* *max-api-process-requests*) (set! *max-api-process-requests* *api-process-request-count*)) (set! *api-process-request-count* (- *api-process-request-count* 1)) Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -120,11 +120,11 @@ res) (begin (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", archive-path=" archive-path) #f))) (begin - (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", block-id=" block-id) + (debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ) #f)))))) ;; no best disk found ;; archive - run bup ;; ;; 1. create the bup dir if not exists @@ -224,11 +224,11 @@ " as it is a toplevel test with children")) ((not (common:file-exists? test-path)) (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path " as path " test-path " does not exist")) (else - (debug:print 0 *default-log-port* + (debug:print 2 *default-log-port* "From test-dat=" test-dat " derived the following:\n" "test-partial-path = " test-partial-path "\n" "test-path = " test-path "\n" "test-physical-path = " test-physical-path "\n" "partial-path-index = " partial-path-index "\n" @@ -268,27 +268,39 @@ ((bup) ;; Archive using bup (let* ((bup-init-params (list "-d" archive-dir "init")) (bup-index-params (append (list "-d" archive-dir "index") test-paths)) (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) (conc "-" compress) ;; or (conc "--compress=" compress) - "-n" (conc (common:get-testsuite-name) "-" run-id) - (conc "--strip-path=" test-base) ;; if we push to the directory do we need this? + "-n" (conc (common:get-testsuite-name) "-"(string-substitute "/" "-" target " ")) + (conc "--strip-path=" (conc test-base target "/" )) ;; if we push to the directory do we need this? ) test-paths))) (if (not (common:file-exists? (conc archive-dir "/HEAD"))) (begin ;; replace this with jobrunner stuff enventually - (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) + (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix) + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") + (exit 1)))) ;; (mutex-unlock! bup-mutex) )) - (debug:print-info 0 *default-log-port* "Indexing data to be archived") + (debug:print-info 2 *default-log-port* "Indexing data to be archived") ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) - (debug:print-info 0 *default-log-port* "Archiving data with bup") - (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") + (exit 1)))) + (debug:print-info 2 *default-log-port* "Archiving data with bup") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an archiving data with bup. Archive failed.") + (exit 1)))))) ((7z tar) (for-each (lambda (test-dat) (let* ((test-id (db:test-get-id test-dat)) (test-name (db:test-get-testname test-dat)) @@ -336,11 +348,11 @@ (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) (home-host (common:get-homehost)) (archive-time (seconds->std-time-str (current-seconds))) - (archive-staging-db (conc *toppath* "/logs/archive_" archive-time)) + (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db")) (dbfile (conc archive-staging-db "/megatest.db"))) (create-directory archive-staging-db #t) (let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix))) (if (eq? exit-code 0) @@ -354,36 +366,49 @@ (conc "--strip-path=" archive-staging-db ) ;; if we push to the directory do we need this? dbfile))) (if (not (common:file-exists? (conc archive-dir "/HEAD"))) (begin ;; replace this with jobrunner stuff enventually - (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) - (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) - (debug:print-info 0 *default-log-port* "Indexing data to be archived") - (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) - (debug:print-info 0 *default-log-port* "Archiving data with bup") - (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (debug:print-info 2 *default-log-port* "Init bup in " archive-dir) + (let-values (((pid-val exit-status exit-code)(run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error initializing bup. Archive failed.") + (exit 1)))))) + (debug:print-info 2 *default-log-port* "Indexing data to be archived") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error Indexing bup. Archive failed.") + (exit 1)))) + (debug:print-info 2 *default-log-port* "Archiving data with bup") + (let-values (((pid-val exit-status exit-code) (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + (if (not (eq? exit-code 0)) + (begin + (debug:print-error 0 *default-log-port* "There was an error archiving data with bup. Archive failed.") + (exit 1)) + (debug:print-info 0 *default-log-port* "To restore megatest.db run megatest -archive replicacte-db -source archive-dir -time-stamp . Current timestamp: " (seconds->std-time-str (current-seconds))))))) (else (debug:print-info 0 *default-log-port* "No support for databse archiving with " archiver))) (debug:print-error 0 *default-log-port* "There was an error rsyncing tmp database"))))) (define (archive:restore-db archive-path ts) (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) (archive-internal-path (conc (common:get-testsuite-name) "-megatest-db/" ts "/megatest.db" )) (bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path))) - (debug:print-info 0 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) - (run-n-wait bup-exe params: bup-restore-params print-cmd: #f)) + (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) + (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:")) + (sleep 2) (db:multi-db-sync (db:setup #f) 'killservers ;'dejunk ;'adj-testids 'old2new ) - (debug:print-info 1 *default-log-port* "dropping trigerrs to update linktree") + (debug:print-info 1 *default-log-port* "dropping triggers to update linktree") (rmt:drop-all-triggers) - (let* ((linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) (src-archive-linktree (rmt:get-var "src-archive-linktree"))) (if (not (equal? src-archive-linktree linktree)) (rmt:update-tesdata-on-repilcate-db src-archive-linktree linktree)) (debug:print-info 1 *default-log-port* "creating triggers after updating linktree") @@ -412,13 +437,13 @@ (time->string (seconds->local-time sec) "%Y-%m-%d-%H%M%S")) -(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name run-id test-partial-path test-last-update) +(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name target test-partial-path test-last-update) (debug:print-info 0 *default-log-port* "Test last update time:" (seconds->std-time-str test-last-update)) - (let* ((internal-path (conc testsuite-name "-" run-id)) + (let* ((internal-path (conc testsuite-name "-" target)) (archive-update-delay (string->number (or (configf:lookup *configdat* "archive" "test-update-delay") "900" ))) (ts-list (archive:ls->list bup-exe archive-dir internal-path)) (ds-flag (vector-ref (seconds->local-time) 8))) (let loop ((hed (car ts-list)) (tail (cdr ts-list))) @@ -457,11 +482,11 @@ (keyvals (rmt:get-key-val-pairs run-id)) (target (string-intersperse (map cadr keyvals) "/")) (toplevel/children (and (db:test-get-is-toplevel test-dat) (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) + (test-partial-path (conc run-name "/" (db:test-make-full-name test-name item-path))) ;; note the trailing slash to get the dir inspite of it being a link (test-path (conc linktree "/" test-partial-path)) ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory (mutex-lock! rp-mutex) (prev-test-physical-path (if (common:file-exists? test-path) @@ -474,12 +499,12 @@ (test-last-update (db:test-get-last_update test-dat)) (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) (archive-path (if (vector? archive-block-info) (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info #f)) ;; no archive found? - (archive-timestamp-dir (if archive-path (archive:get-timestamp-dir bup-exe archive-path (common:get-testsuite-name) run-id test-partial-path test-last-update) #f)) - (archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/" archive-timestamp-dir "/" test-partial-path)) + (archive-timestamp-dir (if archive-path (archive:get-timestamp-dir bup-exe archive-path (common:get-testsuite-name) (string-substitute "/" "-" target " ") test-partial-path test-last-update) #f)) + (archive-internal-path (conc (common:get-testsuite-name) "-" (string-substitute "/" "-" target " ") "/" archive-timestamp-dir "/" test-partial-path)) (include-paths (args:get-arg "-include")) (exclude-pattern (args:get-arg "-exclude-rx")) (exclude-file (args:get-arg "-exclude-rx-from"))) (if (not archive-timestamp-dir) (debug:print-error 0 *default-log-port* "Archive not found for testsuite" (common:get-testsuite-name) " run/test/itempath" test-partial-path) ADDED bin/.11/lib/libpangox-1.0.so Index: bin/.11/lib/libpangox-1.0.so ================================================================== --- /dev/null +++ bin/.11/lib/libpangox-1.0.so cannot compute difference between binary files ADDED bin/.11/lib/libpangox-1.0.so.0 Index: bin/.11/lib/libpangox-1.0.so.0 ================================================================== --- /dev/null +++ bin/.11/lib/libpangox-1.0.so.0 cannot compute difference between binary files ADDED bin/.11/lib/libxcb-xlib.so.0 Index: bin/.11/lib/libxcb-xlib.so.0 ================================================================== --- /dev/null +++ bin/.11/lib/libxcb-xlib.so.0 cannot compute difference between binary files Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -2187,30 +2187,32 @@ (effective-load (common:get-intercept first next)) (recommended-delay (common:get-delay effective-load numcpus)) (effective-host (or remote-host "localhost")) (normalized-effective-load (/ effective-load numcpus)) (will-wait (> normalized-effective-load maxnormload))) - (if (> recommended-delay 0) + (if (> recommended-delay 1) (let* ((actual-delay (min recommended-delay 30))) (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load")) (debug:print-info 0 *default-log-port* "Load control, delaying " actual-delay " seconds to maintain safe load. current normalized effective load is " - normalized-effective-load".")) + normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load)) (thread-sleep! actual-delay))) (cond ;; bad data, try again to get the data ((not will-wait) - (if (common:low-noise-print 30 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) - (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + ;; (if (common:low-noise-print 30 (conc (round normalized-effective-load) "-load-acceptable-" effective-host)) + ;; (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing."))) + ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable (> num-tries 0)) (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.") (thread-sleep! 10) (common:wait-for-cpuload maxnormload numcpus-in count: count remote-host: remote-host num-tries: (- num-tries 1))) + ;; need to wait for load to drop ((and will-wait ;; (> first adjmaxload) (> count 0)) (debug:print-info 0 *default-log-port* "Delaying 15" ;; adjwait Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1081,13 +1081,18 @@ (case option ;; kill servers ((killservers) (for-each (lambda (server) - (match-let (((mod-time host port start-time pid) server)) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) server)) (if (and host pid) - (tasks:kill-server host pid)))) + (tasks:kill-server host pid))))) servers) ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock (delete-file* (common:get-sync-lock-filepath)) ) @@ -1534,11 +1539,11 @@ state TEXT DEFAULT 'new', status TEXT DEFAULT 'n/a', archive_type TEXT DEFAULT 'bup', du INTEGER, archive_path TEXT);"))) - ;; (print "creating trigges from init") + (print "creating triggers from init") (db:create-triggers db) db)) ;; ) ;;====================================================================== ;; A R C H I V E S ADDED lib/libpangox-1.0.so Index: lib/libpangox-1.0.so ================================================================== --- /dev/null +++ lib/libpangox-1.0.so cannot compute difference between binary files ADDED lib/libpangox-1.0.so.0 Index: lib/libpangox-1.0.so.0 ================================================================== --- /dev/null +++ lib/libpangox-1.0.so.0 cannot compute difference between binary files ADDED lib/libxcb-xlib.so.0 Index: lib/libxcb-xlib.so.0 ================================================================== --- /dev/null +++ lib/libxcb-xlib.so.0 cannot compute difference between binary files Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.6581) +(define megatest-version 1.6583) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -519,15 +519,25 @@ "-show-runconfig" ;;"-list-db-targets" "-show-runconfig" "-show-config" "-show-cmdinfo" - "-cleanup-db")) + "-cleanup-db" + )) + (no-watchdog-argvals (list '("-archive" . "replicate-db"))) + (start-watchdog-specail-arg-val (let loop ((hed (car no-watchdog-argvals)) + (tail (cdr no-watchdog-argvals))) + ;; (print "hed" hed " arg " (args:get-arg (car hed)) " val:" (cdr hed) " eql" (equal? (args:get-arg (car hed)) (cdr hed))) + (if (equal? (args:get-arg (car hed)) (cdr hed)) + #f + (if (null? tail) + #t + (loop (car tail) (cdr tail)))))) (no-watchdog-args-vals (filter (lambda (x) x) (map args:get-arg no-watchdog-args))) - (start-watchdog (null? no-watchdog-args-vals))) - ;;(BB> "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals) + (start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-specail-arg-val))) + ;(print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog) (if start-watchdog (thread-start! *watchdog*))) ;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions @@ -1969,15 +1979,15 @@ ;;====================================================================== ;; Archive tests ;;====================================================================== ;; Archive tests matching target, runname, and testpatt -(if (equal? (args:get-arg "-archive") "replicacte-db") +(if (equal? (args:get-arg "-archive") "replicate-db") (begin ;; check if source - ;; check if megatest.db exist - (launch:setup) + ;; check if megatest.db exist + (launch:setup) (if (not (args:get-arg "-source")) (begin (debug:print-info 1 *default-log-port* "Missing required argument -source ") (exit 1))) (if (common:file-exists? (conc *toppath* "/megatest.db")) @@ -2000,11 +2010,11 @@ (set! *didsomething* #t)) (begin (debug:print-error 1 *default-log-port* "Path " source " not found") (exit 1)))))) ;; else do a general-run-call - (if (and (args:get-arg "-archive") (not (equal? (args:get-arg "-archive") "replicacte-db"))) + (if (and (args:get-arg "-archive") (not (equal? (args:get-arg "-archive") "replicate-db"))) (begin ;; for the archive get we need to preserve the starting dir as part of the target path (if (and (args:get-arg "-dest") (not (equal? (substring (args:get-arg "-dest") 0 1) "/"))) (let ((newpath (conc (current-directory) "/" (args:get-arg "-dest")))) Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -1323,11 +1323,13 @@ (begin (print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"") res))))) (conc "megatest " (if (not (member action '("sync"))) (conc action " " action-param) - "")) + "") (if (member action '("-run" "-rerun-clean" "-rerun-all" "-kill-rerun")) + "-rerun DEAD,ABORT,KILLED" + "")) pkta))) ;; (use trace)(trace pkt->cmdline) (define (write-pkt pktsdir uuid pkt) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -392,11 +392,13 @@ (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) - + + + (define (runs:run-post-hook run-id) (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) (existing-tests (if run-post-hook (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses #f #f ;; offset limit @@ -431,10 +433,65 @@ (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) (system (conc run-post-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + +(define (runs:rerun-hook test-id new-test-path testdat rerunlst) + (let* ((rerun-hook (configf:lookup *configdat* "runs" "rerun-hook")) + (log-dir (conc *toppath* "/reruns/logs")) + (target (getenv "MT_TARGET")) + (runname (common:args-get-runname)) + (rundir (db:test-get-rundir testdat)) + (tarfiledir (conc *toppath* "/reruns")) + (status (db:test-get-status testdat)) + (comment (conc "\"" (db:test-get-comment testdat) "\"" )) + (testname (db:test-get-testname testdat)) + (itempath (db:test-get-item-path testdat)) + (file-body (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) ""))) + (log-file (conc file-body ".log")) + ;; (log-file (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".log")) + (full-log-fname (conc log-dir "/" log-file)) + (tarfilename (conc file-body ".tar")) + ;; (tarfilename (conc status "." (string-translate target "/" "-") "." runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".tar")) + ) + (if rerun-hook + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file)) + (sys-call-text (conc rerun-hook " " tarfilename " " rundir " " actual-logf " " runname " " tarfiledir " " status " " target " " comment " " testname " " itempath " >> " actual-logf " 2>&1")) + ) + (debug:print 2 *default-log-port* "Found rerun-hook in config:" rerun-hook) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (debug:print 0 *default-log-port* "ERROR: failed to run rerun-hook " rerun-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running rerun-hook: \"" rerun-hook "\", log is " actual-logf) + ;; call the hook + (debug:print-info 0 *default-log-port* "Calling rerun-hook for " test-id new-test-path testdat rerunlst) + (debug:print-info 0 *default-log-port* "rerun hook: " rerun-hook) + (debug:print-info 0 *default-log-port* "tarfilename: " tarfilename) + (debug:print-info 0 *default-log-port* "rundir: " rundir) + (debug:print-info 0 *default-log-port* "actual-logf: " actual-logf) + (debug:print-info 0 *default-log-port* "runname: " runname) + (debug:print-info 0 *default-log-port* "sys-call-text: " sys-call-text) + (system sys-call-text) + (debug:print-info 0 *default-log-port* "rerun-hook \"" rerun-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + + ;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. (define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) @@ -602,11 +659,11 @@ (runs:update-all-test_meta #f) ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) - ;; mark all test launced flag as false in the meta table + ;; mark all test launched flag as false in the meta table (rmt:set-var (conc "lunch-complete-" run-id) "no") (debug:print-info 1 *default-log-port* "Setting end-of-run to no") (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (config-rerun-cnt (if config-reruns @@ -764,11 +821,11 @@ (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) - (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) + (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") @@ -2022,25 +2079,34 @@ (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) (member (test:get-state testdat) '("COMPLETED")))) (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) (set! runflag #f)) + ;; -rerun and status is one of the specifed, run it ((and rerun (let* ((rerunlst (string-split rerun ",")) (must-rerun (member (test:get-status testdat) rerunlst))) (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) must-rerun)) (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) - (set! runflag #t)) - ;; -keepgoing, do not rerun FAIL + (set! runflag #t) + (debug:print-info 2 *default-log-port* "Calling rerun hook") + (runs:rerun-hook test-id new-test-path testdat rerun) + ) + + + + ;; -keepgoing, do not rerun FAIL ((and keepgoing (member (test:get-status testdat) '("FAIL"))) (set! runflag #f)) - ((and (not rerun) + + ((and (not rerun) (member (test:get-status testdat) '("FAIL" "n/a"))) (set! runflag #t)) + (else (set! runflag #f))) (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) (if (not runflag) (if (not parent-test) (if (runs:lownoise (conc "not starting test" full-test-name) 60) @@ -2202,32 +2268,38 @@ ;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) - (precmd (or (args:get-arg "-precmd") ""))) - (print "Actions: " actions) - (for-each - (lambda (target) - (let* ((runs (hash-table-ref runs-ht target)) - (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) - (to-remove (let* ((len (length sorted)) - (trim-amt (- len num-to-keep))) - (if (> trim-amt 0) - (take sorted trim-amt) - '())))) - (hash-table-set! runs-ht target to-remove) - (print target ":") - (for-each - (lambda (run) - (let ((remove (member run to-remove (lambda (a b) - (eq? (simple-run-id a) - (simple-run-id b)))))) - (if (and age (> (simple-run-event_time run) age-mark)) - (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) - (for-each - (lambda (action) + (precmd (or (args:get-arg "-precmd") "")) + (action-chk (member (string->symbol "remove-runs") actions))) + ;; check the sequence of actions archive must comme before remove-runs + (if (and action-chk (member (string->symbol "archive") action-chk)) + (begin + (debug:print-error 0 *default-log-port* "action remove-runs must come after archive") + (exit 1))) + (print "Actions: " actions " age: " age) + (for-each + (lambda (action) + (for-each + (lambda (target) + (let* ((runs (hash-table-ref runs-ht target)) + (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) + (to-remove (let* ((len (length sorted)) + (trim-amt (- len num-to-keep))) + (if (> trim-amt 0) + (take sorted trim-amt) + '())))) + (hash-table-set! runs-ht target to-remove) + (print target ":") + (for-each + (lambda (run) + (let ((remove (member run to-remove (lambda (a b) + (eq? (simple-run-id a) + (simple-run-id b)))))) + (if (and age (> (simple-run-event_time run) age-mark)) + (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) (case action ((print) (print " " (simple-run-runname run) " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") " " (if remove "REMOVE" ""))) @@ -2237,17 +2309,14 @@ " -kill-wait 0" ""))))) ((archive) (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) ((kill-runs) - (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) - )) - actions)))) - sorted))) - ;; (print "Sorted: " (map simple-run-event_time sorted)) - ;; (print "Remove: " (map simple-run-event_time to-remove)))) - (hash-table-keys runs-ht)) + (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))))))) + sorted))) + (hash-table-keys runs-ht))) + actions) runs-ht)) (define (remove-last-path-directory path-in) (let* ((dparts (string-split path-in "/")) (path-out (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/"))) @@ -2255,29 +2324,10 @@ path-out ) ) -;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) -;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) -;; (for-each -;; (lambda (target) -;; (let ((runs-to-remove (hash-table-ref data target ))) -;; (for-each -;; (lambda (run) -;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) -;; runs-to-remove))) -;; (hash-table-keys data)))) - -;; Remove runs -;; fields are passing in through -;; action: -;; 'remove-runs -;; 'set-state-status -;; -;; NB// should pass in keys? -;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) ;; (tdbdat (tasks:open-db)) (keys (rmt:get-keys)) @@ -2511,11 +2561,11 @@ (begin (let ((rundir (db:test-get-rundir new-test-dat))) (if (and (not (string= rundir "/tmp/badname")) (file-exists? rundir) (substring-index run-name rundir) - (substring-index target rundir) + (tests:glob-like-match (conc "%/" target "/%") rundir) ) (begin (set! lasttpath (db:test-get-rundir new-test-dat)) ;; remember this path for run removal (set! lastrealpath (remove-last-path-directory (resolve-pathname lasttpath))) (hash-table-set! run-paths-hash lastrealpath 1) @@ -2524,11 +2574,13 @@ (begin (debug:print 2 *default-log-port* "Not removing directory " rundir " because either it doesn't exist or has a bad name") (debug:print 2 *default-log-port* "Is /tmp/badname: " (string= rundir "/tmp/badname")) (debug:print 2 *default-log-port* "Exists: " (file-exists? rundir)) (debug:print 2 *default-log-port* "Has run-name: " (substring-index run-name rundir)) - (debug:print 2 *default-log-port* "Has target: " (substring-index target rundir)) + (debug:print 2 *default-log-port* "Has target: " (tests:glob-like-match (conc "%/" target "/%") rundir)) + (debug:print 2 *default-log-port* "Target: " target) + ;;PJH remove record from db no need to cleanup directory (case mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) Index: sauth-common.scm ================================================================== --- sauth-common.scm +++ sauth-common.scm @@ -240,11 +240,20 @@ (set! obj data-row)))) ;(print obj) obj)) +(define (sauth-common:src-size path) + (let ((output (with-input-from-pipe (conc "/usr/bin/du -s " path " | awk '{print $1}'") + (lambda() + (read-line))))) + (string->number output))) +(define (sauth-common:space-left-at-dest path) + (let* ((output (run/string (pipe (df ,path ) (tail -1)))) + (size (caddr (cdr (string-split output " "))))) + (string->number size))) ;; function to validate the users input for target path and resolve the path ;; TODO: Check for restriction in subpath (define (sauth-common:resolve-path new current allowed-sheets) (let* ((target-path (append current (string-split new "/"))) @@ -279,11 +288,11 @@ (if (and (not (equal? restricted-areas "" )) (string-match (regexp restrictions) target-path)) (begin - (sauth:print-error "Access denied to " (string-join resolved-path "/")) + (sauth:print-error (conc "Access denied to " (string-join resolved-path "/"))) ;(exit 1) #f) target-path) )) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -167,11 +167,13 @@ ;; given a path to a server log return: host port startseconds ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let (define (server:logf-get-start-info logf) - (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id + (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id + (dbprep-rx (regexp "^SERVER: dbprep")) + (dbprep-found 0)) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server @@ -179,11 +181,16 @@ logf (lambda () (let loop ((inl (read-line)) (lnum 0)) (if (not (eof-object? inl)) - (let ((mlst (string-match rx inl))) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl)) + ) + (if dbprep + (set! dbprep-found 1) + ) (if (not mlst) (if (< lnum 500) ;; give up if more than 500 lines of server log read (loop (read-line)(+ lnum 1)) (begin (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) @@ -192,11 +199,17 @@ (list (car dat) ;; host (string->number (cadr dat)) ;; port (string->number (caddr dat)) (cadr (cddr dat)))))) (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) + (if dbprep-found + (begin + (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds)) + (thread-sleep! 25) + ) + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) + ) (list #f #f #f #f))))))))) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; @@ -214,15 +227,21 @@ (create-directory (conc areapath "/logs") #t) (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) (directory-exists? (conc areapath "/logs"))) '())) - (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) + + ;; Get the list of server logs that do not contain "exiting". Ignore logs for servers that have exited. + (let* ((server-logs-cmd (conc "grep -iL exiting " areapath "/logs/server-*-*.log")) + (server-logs (string-split (string-chomp (call-with-input-pipe server-logs-cmd read-all)))) (num-serv-logs (length server-logs))) - (if (null? server-logs) - '() - (let loop ((hed (car server-logs)) + (if (or (null? server-logs) (= num-serv-logs 0)) + (let () + (debug:print 1 *default-log-port* "There are no servers running") + '() + ) + (let loop ((hed (string-chomp (car server-logs))) (tal (cdr server-logs)) (res '())) (let* ((mod-time (handle-exceptions exn (begin @@ -243,11 +262,11 @@ (if (null? tal) (if (and limit (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) - (loop (car tal)(cdr tal) new-res))))))))) + (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) (define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) @@ -359,10 +378,11 @@ (let* ((fmodtime (file-modification-time start-flag)) (delta (- (current-seconds) fmodtime)) (all-go (> delta reftime))) (if (and all-go (begin + (debug:print-info 0 *default-log-port* "Writing " start-flag) (with-output-to-file start-flag (lambda () (print server-key))) (thread-sleep! 0.25) (let ((res (with-input-from-file start-flag @@ -395,10 +415,11 @@ (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously (lock-file (conc areapath "/logs/server-start.lock"))) (if (> (- (current-seconds) when-run) run-delay) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 15) + (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) (system (conc "touch " start-flag)) ;; lazy but safe (server:run areapath) (thread-sleep! 2) ;; don't release the lock for at least a few seconds (common:simple-file-release-lock lock-file))) (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) @@ -464,11 +485,11 @@ (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) #f) - (match-let (((mod-time hostname port start-time pid) + (match-let (((mod-time hostname port start-time server-id pid) servr)) (tasks:kill-server hostname pid)))) ;; called in megatest.scm, host-port is string hostname:port ;; Index: spublish.scm ================================================================== --- spublish.scm +++ spublish.scm @@ -391,10 +391,14 @@ ((not (file-exists? target-path)) (sauth:print-error (conc " target Directory " target-path " does not exist!!"))) ((not (file-exists? src-path)) (sauth:print-error (conc "Source path " src-path " does not exist!!" ))) (else + (if (< (sauth-common:space-left-at-dest target-path) (sauth-common:src-size src-path)) + (begin + (sauth:print-error "Destination does not have enough disk space.") + (exit 1))) (if (is_directory src-path) (begin (let* ((parent-dir src-path) (start-dir target-path)) (run (pipe Index: sretrieve.scm ================================================================== --- sretrieve.scm +++ sretrieve.scm @@ -697,11 +697,12 @@ ;;do nothing for dirs) ) (else (if (not (string-match (regexp exclude) p )) - (print (string-substitute (conc base_path "/") "" p "-")))))))) + (print (string-substitute (conc base_path "/") "" p "-")))))) + dotfiles: #t)) (define (sretrieve:shell-help) (conc "Usage: " *exe-name* " [action [params ...]] ls [target path] : list contents of target area. The output of the cmd can be piped into other system cmd. eg ls | grep txt @@ -1081,12 +1082,12 @@ (define (main) (let* ((args (argv)) (prog (car args)) (rema (cdr args)) (exe-name (pathname-file (car (argv)))) - (exe-dir (or (pathname-directory prog) - (sretrieve:find exe-name (string-split (get-environment-variable "PATH") ":")))) + ;(exe-dir (or (pathname-directory prog) + ; (sretrieve:find exe-name (string-split (get-environment-variable "PATH") ":")))) ;(configdat (sretrieve:load-config exe-dir exe-name)) ) ;; preserve the exe data in the config file ;(hash-table-set! configdat "exe-info" (list (list "exe-name" exe-name) ; (list "exe-dir" exe-dir))) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -66,12 +66,12 @@ '())))) (filter (lambda (d) (if (directory-exists? d) d (begin - (if (common:low-noise-print 60 "tests:get-tests-search-path" d) - (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) + ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d) + ;; (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) #f))) (append paths (list (conc *toppath* "/tests")))))) (define (tests:get-valid-tests test-registry tests-paths) (if (null? tests-paths) Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -19,10 +19,11 @@ prefix=$1 cmd=$2 target=$3 cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh" +libdir="$prefix/bin/.$(lsb_release -sr)/lib" # we wish to create a var in cfg.sh for finding sqlite3 executable chicken_bin_dir=$(dirname $(which csi)) if [[ -e $chicken_bin_dir/sqlite3 ]];then sqlite3_exe=$chicken_bin_dir/sqlite3 @@ -30,19 +31,20 @@ sqlite3_exe=$(which sqlite3) fi if [ "$LD_LIBRARY_PATH" != "" ];then echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2 + echo "INFO: Writing $cfgfile" >&2 ( cat << __EOF if [ -z \$MT_ORIG_ENV ]; then export MT_ORIG_ENV=\$( $prefix/bin/serialize-env ) fi if [ "\$LD_LIBRARY_PATH" != "" ];then - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH:$libdir else - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$libdir fi export MT_SQLITE3_EXE=$sqlite3_exe __EOF ) > $cfgfile