Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -179,18 +179,21 @@ mofiles/commonmod.o : megatest-fossil-hash.scm mofiles/stml2.o \ mofiles/mtargs.o mofiles/pkts.o mofiles/mtconfigf.o \ mofiles/processmod.o mofiles/pgdbmod.o : mofiles/commonmod.o mofiles/dbmod.o : mofiles/commonmod.o mofiles/keysmod.o \ - mofiles/tasksmod.o mofiles/odsmod.o mofiles/testsmod.o + mofiles/tasksmod.o mofiles/odsmod.o mofiles/tasksmod.o : mofiles/commonmod.o mofiles/pgdbmod.o mofiles/rmtmod.o : mofiles/commonmod.o \ mofiles/apimod.o mofiles/ulex.o mofiles/itemsmod.o mofiles/apimod.o : mofiles/dbmod.o mofiles/commonmod.o mofiles/servermod.o -mofiles/runsmod.o : mofiles/testsmod.o -mofiles/mtmod.o : mofiles/mtconfigf.o mofiles/rmtmod.o +mofiles/runsmod.o : mofiles/testsmod.o mofiles/mtmod.o +mofiles/mtmod.o : mofiles/mtconfigf.o mofiles/rmtmod.o mofiles/tasksmod.o \ + mofiles/dbmod.o mofiles/pgdbmod.o mofiles/launchmod.o mofiles/servermod.o : mofiles/commonmod.o mofiles/dbmod.o +mofiles/testsmod.o : mofiles/servermod.o mofiles/dbmod.o +mofiles/launchmod.o : mofiles/subrunmod.o # Removed from megamod.o dep: mofiles/ftail.o mofiles/megamod.o : \ mofiles/rmtmod.o \ mofiles/commonmod.o \ Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -80,12 +80,12 @@ ;;=============================================== ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS - ((start-server) (apply server:kind-run params)) - ((kill-server) (set! *server-run* #f)) + ;; ((start-server) (apply server:kind-run params)) + ;; ((kill-server) (set! *server-run* #f)) ;; TESTS ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -69,10 +69,14 @@ (reachable #f) (last-update 0) (last-used 0) (last-cpuload 1)) +(define (common:run-sync?) + (and (common:on-homehost?) + (args:get-arg "-server"))) + ;; GLOBALS ;; CONTEXTS Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -21,11 +21,11 @@ (declare (unit dbmod)) (declare (uses commonmod)) (declare (uses keysmod)) (declare (uses tasksmod)) (declare (uses odsmod)) -(declare (uses testsmod)) +;; (declare (uses testsmod)) (declare (uses mtargs)) (declare (uses mtconfigf)) ;; (declare (uses servermod)) (module dbmod @@ -44,11 +44,11 @@ (import commonmod) (import keysmod) (import files) (import tasksmod) (import odsmod) -(import testsmod) +;; (import testsmod) (import (prefix mtargs args:)) (import (prefix mtconfigf configf:)) ;; (import servermod) ;; (use (prefix ulex ulex:)) @@ -90,10 +90,245 @@ ;; (defstruct dbr:counts (state #f) (status #f) (count 0)) + +;; more crap. + + + + +(define (tests:get-tests-search-path cfgdat) + (let ((paths (let ((section (if cfgdat + (configf:get-section cfgdat "tests-paths") + #f))) + (if section + (map cadr section) + '())))) + (filter (lambda (d) + (if (directory-exists? d) + d + (begin + (if (common:low-noise-print 60 "tests:get-tests-search-path" d) + (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) + #f))) + (append paths (list (conc *toppath* "/tests")))))) + +(define (mt:lazy-read-test-config test-name) + (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) + (if tconf + tconf + (let ((test-dirs (tests:get-tests-search-path *configdat*))) + (let loop ((hed (car test-dirs)) + (tal (cdr test-dirs))) + ;; Setting MT_LINKTREE here is almost certainly unnecessary. + (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) + (if (and (common:file-exists? tconfig-file) + (file-read-access? tconfig-file)) + (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (old-link-tree (get-environment-variable "MT_LINKTREE"))) + (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) + (let ((newtcfg (configf:read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] + (hash-table-set! *testconfigs* test-name newtcfg) + (if old-link-tree + (setenv "MT_LINKTREE" old-link-tree) + (unsetenv "MT_LINKTREE")) + newtcfg)) + (if (null? tal) + (begin + (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) + #f) + (loop (car tal)(cdr tal)))))))))) + +;; if itempath is #f then look only at the testname part +;; +(define (tests:match->sqlqry patterns) + (if (string? patterns) + (let ((patts (string-split patterns ","))) + (if (null? patts) ;;; no pattern(s) means no match, we will do no query + #f + (let loop ((patt (car patts)) + (tal (cdr patts)) + (res '())) + ;; (print "loop: patt: " patt ", tal " tal) + (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) + (test-patt (cadr patt-parts)) + (item-patt (cadddr patt-parts)) + (test-qry (db:patt->like "testname" test-patt)) + (item-qry (db:patt->like "item_path" item-patt)) + (qry (conc "(" test-qry " AND " item-qry ")"))) + ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) + (if (null? tal) + (string-intersperse (append (reverse res)(list qry)) " OR ") + (loop (car tal)(cdr tal)(cons qry res))))))) + #f)) + +;; keys list to key1,key2,key3 ... +(define (runs:get-std-run-fields keys remfields) + (let* ((header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (list keystr header))) + +;; A routine to map itempaths using a itemmap +;; patha and pathb must be strings or this will fail +;; +;; path-b is waiting on path-a +;; +(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) + (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) + (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) + (if itemmap + (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) + (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) + (equal? path-a path-b-mapped)) + (equal? path-b path-a)))) + +;;====================================================================== +;; Run keys, these are used to hierarchially organise tests and run areas +;;====================================================================== + +(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... + (string-intersperse keys ",")) + +;;====================================================================== +;; config file related routines +;;====================================================================== + +(define keys:config-get-fields common:get-fields) +(define (keys:make-key/field-string confdat) + (let ((fields (configf:get-section confdat "fields"))) + (string-join + (map (lambda (field)(conc (car field) " " (cadr field))) + fields) + ","))) + +;; given the keys (a list of vectors or a list of keys) and a target return a keyval list +;; keyval list ( (key1 val1) (key2 val2) ...) +(define (keys:target->keyval keys target) + (let* ((targlist (string-split target "/")) + (numkeys (length keys)) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "")) + targlist))) + (map (lambda (key targ) + (list key targ)) + keys targtweaked))) + +;; patterns are: +;; "rx1" "replacement1"\n +;; "rx2" "replacement2" +;; etc. +;; +(define (db:multi-pattern-apply item-path itemmap) + (let ((all-patts (string-split itemmap "\n"))) + (if (null? all-patts) + item-path + (let loop ((hed (car all-patts)) + (tal (cdr all-patts)) + (res item-path)) + (let* ((parts (string-split hed)) + (patt (car parts)) + + (repl (if (> (length parts) 1)(cadr parts) "")) + + (newr (if (and patt repl) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) + res) + (string-substitute patt repl res)) + + + ) + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) + res)))) + (if (null? tal) + newr + (loop (car tal)(cdr tal) newr))))))) + +;; given a list of itemmaps (testname . map), return the first match +;; +(define (tests:lookup-itemmap itemmaps testname) + (let ((best-matches (filter (lambda (itemmap) + (tests:match (car itemmap) testname #f)) + itemmaps))) + (if (null? best-matches) + #f + (let ((res (car best-matches))) + ;; (debug:print 0 *default-log-port* "res=" res) + (cond + ((string? res) res) ;;; FIX THE ROOT CAUSE HERE .... + ((null? res) #f) + ((string? (cdr res)) (cdr res)) ;; it is a pair + ((string? (cadr res))(cadr res)) ;; it is a list + (else cadr res)))))) + +;; make a query (fieldname like 'patt1' OR fieldname +(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) + (let ((patts (if (string? pattstr) + (string-split pattstr ",") + '("%")))) + (string-intersperse (map (lambda (patt) + (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) + (conc fieldname " " wildtype " '" patt "'"))) + (if (null? patts) + '("") + patts)) + comparator))) + +;; if itempath is #f then look only at the testname part +;; +(define (tests:match patterns testname itempath #!key (required '())) + (if (string? patterns) + (let ((patts (append (string-split patterns ",") required))) + (if (null? patts) ;;; no pattern(s) means no match + #f + (let loop ((patt (car patts)) + (tal (cdr patts))) + ;; (print "loop: patt: " patt ", tal " tal) + (if (string=? patt "") + #f ;; nothing ever matches empty string - policy + (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) + (test-patt (cadr patt-parts)) + (item-patt (cadddr patt-parts))) + ;; special case: test vs. test/ + ;; test => "test" "%" + ;; test/ => "test" "" + (if (and (not (substring-index "/" patt)) ;; no slash in the original + (or (not item-patt) + (equal? item-patt ""))) ;; should always be true that item-patt is "" + (set! item-patt "%")) + ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) + (if (and (tests:glob-like-match test-patt testname) + (or (not itempath) + (tests:glob-like-match (if item-patt item-patt "") itempath))) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))))) + +;; tests:glob-like-match +(define (tests:glob-like-match patt str) + (let ((like (substring-index "%" patt))) + (let* ((notpatt (equal? (substring-index "~" patt) 0)) + (newpatt (if notpatt (substring patt 1) patt)) + (finpatt (if like + (string-substitute (regexp "%") ".*" newpatt #f) + (string-substitute (regexp "\\*") ".*" newpatt #f))) + (res #f)) + ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt) + (set! res (string-match (regexp finpatt (if like #t #f)) str)) + (if notpatt (not res) res)))) + ;;====================================================================== ;; SQLITE3 HELPERS ;;====================================================================== Index: docs/code/module-hierarchy.dot ================================================================== --- docs/code/module-hierarchy.dot +++ docs/code/module-hierarchy.dot @@ -23,10 +23,11 @@ subgraph cluster_megatest { label="megatest"; rmtmod [label="rmt mod"]; + // httpmod [label="http-transportmod"]; // commonmod mtargs -> commonmod; pkts -> commonmod; mtconfigf -> commonmod; processmod -> commonmod; @@ -44,11 +45,10 @@ // dbmod commonmod -> dbmod; keysmod -> dbmod; tasksmod -> dbmod; odsmod -> dbmod; - testsmod -> dbmod; mtargs -> dbmod; mtconfigf -> dbmod; // tasksmod commonmod -> tasksmod; pgdbmod -> tasksmod; @@ -56,14 +56,22 @@ // odsmod commonmod -> odsmod; // testsmod commonmod -> testsmod; mtargs -> testsmod; + servermod -> testsmod; + mtconfigf -> testsmod; + itemsmod -> testsmod; + dbmod -> testsmod; // mtmod mtconfigf -> mtmod; commonmod -> mtmod; rmtmod -> mtmod; + servermod -> mtmod; + taskmod -> mtmod; + mtargs -> mtmod; + launchmod -> mtmod; // runsmod mtmod -> runsmod; commonmod -> runsmod; testsmod -> runsmod; // pgdbmod @@ -70,12 +78,21 @@ commonmod -> pgdbmod; mtconfigf -> pgdbmod; //servermod commonmod -> servermod; dbmod -> servermod; - // processmod - - + mtconfigf -> servermod; + mtargs -> servermod; + tasksmod -> servermod; + // launchmod + commonmod -> launchmod; + rmtmod -> launchmod; + testsmod -> launchmod; + mtconfigf -> launchmod; + mtargs -> launchmod; + subrunmod -> launchmod; + // subrun + } } DELETED http-transport-inc.scm Index: http-transport-inc.scm ================================================================== --- http-transport-inc.scm +++ /dev/null @@ -1,680 +0,0 @@ - -;; Copyright 2006-2012, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -(define (http-transport:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; S E R V E R -;; ====================================================================== - -;; Call this to start the actual server -;; - -(define *db:process-queue-mutex* (make-mutex)) - -(define (http-transport:run hostn) - ;; Configurations for server - (tcp-buffer-size 2048) - (max-connections 2048) - (debug:print 2 *default-log-port* "Attempting to start the server ...") - (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (start-port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.server-start"))) - (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) - ;; set some parameters for the server - (root-path (if link-tree-path - link-tree-path - (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! - (handle-directory spiffy-directory-listing) - (handle-exception (lambda (exn chain) - (signal (make-composite-condition - (make-property-condition - 'server - 'message "server error"))))) - - ;; http-transport:handle-directory) ;; simple-directory-handler) - ;; Setup the web server and a /ctrl interface - ;; - (vhost-map `(((* any) . ,(lambda (continue) - ;; open the db on the first call - ;; This is were we set up the database connections - (let* (($ (request-vars source: 'both)) - (dat ($ 'dat)) - (res #f)) - (cond - ((equal? (uri-path (request-uri (current-request))) - '(/ "api")) - (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc - headers: '((content-type text/plain))) - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*)) - ((equal? (uri-path (request-uri (current-request))) - '(/ "")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "json_api")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "runs")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ any)) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "hey")) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "jquery3.1.0.js")) - (send-response body: (http-transport:show-jquery) - headers: '((content-type application/javascript)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "test_log")) - (send-response body: (http-transport:html-test-log $) - headers: '((content-type text/HTML)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "dashboard")) - (send-response body: (http-transport:html-dboard $) - headers: '((content-type text/HTML)))) - (else (continue)))))))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - (http-transport:try-start-server ipaddrstr start-port))) - -;; This is recursively run by http-transport:run until sucessful -;; -(define (http-transport:try-start-server ipaddrstr portnum) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) - (if (not config-use-proxy) - (determine-proxy (constantly #f))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (if config-hostname ;; this is a hint to bind directly - (start-server port: portnum bind-address: (if (equal? config-hostname "-") - ipaddrstr - config-hostname)) - (start-server port: portnum)) - (portlogger:open-run-close portlogger:set-port portnum "released") - (debug:print 1 *default-log-port* "INFO: server has been stopped")))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;;====================================================================== -;; C L I E N T S -;;====================================================================== - -(define *http-mutex* (make-mutex)) - -;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here -;; I'm pretty sure it is defunct. - -;; This next block all imported en-mass from the api branch -(define *http-requests-in-progress* 0) -(define *http-connections-next-cleanup* (current-seconds)) - -(define (http-transport:get-time-to-cleanup) - (let ((res #f)) - (mutex-lock! *http-mutex*) - (set! res (> (current-seconds) *http-connections-next-cleanup*)) - (mutex-unlock! *http-mutex*) - res)) - -(define (http-transport:inc-requests-count) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) - ;; Use this opportunity to slow things down iff there are too many requests in flight - (if (> *http-requests-in-progress* 5) - (begin - (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") - (thread-sleep! 1))) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count proc) - (mutex-lock! *http-mutex*) - (proc) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count-and-close-all-connections) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds - (if (> *http-requests-in-progress* 0) - (if (> etime (current-seconds)) - (begin - (thread-sleep! 0.05) - (loop etime)) - (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) - (close-all-connections!))) - (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:inc-requests-and-prep-to-close-all-connections) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) - -;; Send "cmd" with json payload "params" to serverdat and receive result -;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) - (let* ((fullurl (if (vector? serverdat) - (http-transport:server-dat-get-api-req serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1)))) - (res (vector #f "uninitialized")) - (success #t) - (sparams (db:obj->string params transport: 'http)) - (runremote (or area-dat *runremote*))) - (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") - ;; set up the http-client here - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - ;; send the data and get the response - ;; extract the needed info from the http data and - ;; process and return it. - (let* ((send-recieve (lambda () - (mutex-lock! *http-mutex*) - ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) - ;; ((exn http client-error) e (print e))) - (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. - success - (db:string->obj - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (set! success #f) - (if (debug:debug-mode 1) - (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") - (begin - (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " msg) - (debug:print 0 *default-log-port* " cmd: " cmd " params: " params) - (debug:print 0 *default-log-port* " call-chain: " call-chain))) - (if runremote - (remote-conndat-set! runremote #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? - (mutex-unlock! *http-mutex*) - ;;; (signal (make-composite-condition - ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;;; "communications failed" - (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key (or *server-id* "thekey")) - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) - transport: 'http) - 0)) ;; added this speculatively - ;; Shouldn't this be a call to the managed call-all-connections stuff above? - (close-all-connections!) - (mutex-unlock! *http-mutex*) - )) - (time-out (lambda () - (thread-sleep! 45) - #f)) - (th1 (make-thread send-recieve "with-input-from-request")) - (th2 (make-thread time-out "time out"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - (thread-terminate! th2) - (debug:print-info 11 *default-log-port* "got res=" res) - (if (vector? res) - (if (vector-ref res 0) ;; this is the first flag or the second flag? - res ;; this is the *inner* vector? seriously? why? - (if (debug:debug-mode 11) - (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it - (print-call-chain (current-error-port)) - (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 11 *default-log-port* " server call chain:") - (pp (vector-ref res 1) (current-error-port)) - (signal (vector-ref res 0))) - res)) - (signal (make-composite-condition - (make-property-condition - 'timeout - 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) - -;; careful closing of connections stored in *runremote* -;; -(define (http-transport:close-connections #!key (area-dat #f)) - (let* ((runremote (or area-dat *runremote*)) - (server-dat (if runremote - (remote-conndat runremote) - #f))) ;; (hash-table-ref/default *runremote* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) - (close-connection! api-dat) - ;;(close-idle-connections!) - #t)) - #f))) - -;; Moved to commonmod -;; -;; (define (make-http-transport:server-dat)(make-vector 6)) -;; (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) -;; (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) -;; (define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) -;; (define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) -;; (define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) -;; (define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) -;; (define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) -;; -;; (define (http-transport:server-dat-make-url vec) -;; (if (and (http-transport:server-dat-get-iface vec) -;; (http-transport:server-dat-get-port vec)) -;; (conc "http://" -;; (http-transport:server-dat-get-iface vec) -;; ":" -;; (http-transport:server-dat-get-port vec)) -;; #f)) -;; -;; (define (http-transport:server-dat-update-last-access vec) -;; (if (vector? vec) -;; (vector-set! vec 5 (current-seconds)) -;; (begin -;; (print-call-chain (current-error-port)) -;; (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) - -;; -;; connect -;; -(define (http-transport:client-connect iface port) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) - server-dat)) - -;; run http-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (http-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((sdat #f) - (tmp-area (common:get-db-tmp-area)) - (started-file (conc tmp-area "/.server-started")) - (server-start-time (current-seconds)) - (server-info (let loop ((start-time (current-seconds)) - (changed #t) - (last-sdat "not this")) - (begin ;; let ((sdat #f)) - (thread-sleep! 0.01) - (debug:print-info 0 *default-log-port* "Waiting for server alive signature") - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if (and sdat - (not changed) - (> (- (current-seconds) start-time) 2)) - (begin - (debug:print-info 0 *default-log-port* "Received server alive signature") - (common:save-pkt `((action . alive) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat))) - *configdat* #t) - sdat) - (begin - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (begin - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (common:save-pkt `((action . died) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat)) - (msg . "Transport died?")) - *configdat* #t) - (exit)) - (loop start-time - (equal? sdat last-sdat) - sdat))))))) - (iface (car server-info)) - (port (cadr server-info)) - (last-access 0) - (server-timeout (server:expiration-timeout)) - (server-going #f) - (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - - (with-output-to-file started-file (lambda ()(print (current-process-id)))) - - (let loop ((count 0) - (server-state 'available) - (bad-sync-count 0) - (start-time (current-milliseconds))) - ;; Use this opportunity to sync the tmp db to megatest.db - (if (not server-going) ;; *dbstruct-db* - (begin - (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-db* (db:setup #t)) ;; run-id)) - (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. - (thread-start! *watchdog*))) - - ;; when things go wrong we don't want to be doing the various queries too often - ;; so we strive to run this stuff only every four seconds or so. - (let* ((sync-time (- (current-milliseconds) start-time)) - (rem-time (quotient (- 4000 sync-time) 1000))) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time))) - - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) - - ;; Check that iface and port have not changed (can happen if server port collides) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - - (if (not (equal? sdat (list iface port))) - (let ((new-iface (car sdat)) - (new-port (cadr sdat))) - (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") - (set! iface new-iface) - (set! port new-port) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) - (flush-output *default-log-port*))) - - ;; Transfer *db-last-access* to last-access to use in checking that we are still alive - (mutex-lock! *heartbeat-mutex*) - (set! last-access *db-last-access*) - (mutex-unlock! *heartbeat-mutex*) - - (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) - (begin - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) - (flush-output *default-log-port*))) - (if (common:low-noise-print 60 "dbstats") - (begin - (debug:print 0 *default-log-port* "Server stats:") - (db:print-current-query-stats))) - (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) - (cond - ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (let ((curr-time (current-seconds))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") - (if (not *server-overloaded*) - (change-file-times server-log-file curr-time curr-time))))) - (loop 0 server-state bad-sync-count (current-milliseconds))) - (else - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (http-transport:server-shutdown port))))))) - -(define (http-transport:server-shutdown port) - (begin - ;;(BB> "http-transport:server-shutdown called") - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - ;; - ;; start_shutdown - ;; - (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up - (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 1) - - ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - ;; (debug:print-info 0 *default-log-port* "Average cached write time " - ;; (if (eq? *number-of-writes* 0) - ;; "n/a (no writes)" - ;; (/ *writes-total-delay* - ;; *number-of-writes*)) - ;; " ms") - ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - ;; (debug:print-info 0 *default-log-port* "Average non-cached time " - ;; (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - ;; " ms") - - (db:print-current-query-stats) - (common:save-pkt `((action . exit) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (exit))) - -;; all routes though here end in exit ... -;; -;; start_server? -;; -(define (http-transport:launch) - ;; check that a server start is in progress, pause or exit if so - (let* ((tmp-area (common:get-db-tmp-area)) - (server-start (conc tmp-area "/.server-start")) - (server-started (conc tmp-area "/.server-started")) - (start-time (common:lazy-modification-time server-start)) - (started-time (common:lazy-modification-time server-started)) - (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting - (start-time-old (> (- (current-seconds) start-time) 5)) - (cleanup-proc (lambda (msg) - (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) - (full-serv-fname (conc *toppath* "/logs/" serv-fname)) - (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) - (debug:print 0 *default-log-port* msg) - (if (common:file-exists? full-serv-fname) - (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) - (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) - (exit))))) - (if (and (not start-time-old) ;; last server start try was less than five seconds ago - (not server-starting)) - (begin - (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") - (exit))) - ;; lets not even bother to start if there are already three or more server files ready to go - (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) - (if (> num-alive 3) - (begin - (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) - (exit)))) - (common:save-pkt `((action . start) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))) - -;; (define (http-transport:server-signal-handler signum) -;; (signal-mask! signum) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; (thread-sleep! 1)) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) - -;;=============================================== -;; Java script -;;=============================================== -(define (http-transport:show-jquery) - (let* ((data (tests:readlines *java-script-lib*))) -(string-join data "\n"))) - - - -;;====================================================================== -;; web pages -;;====================================================================== - -(define (http-transport:html-test-log $) - (let* ((run-id ($ 'runid)) - (test-item ($ 'testname)) - (parts (string-split test-item ":")) - (test-name (car parts)) - - (item-name (if (equal? (length parts) 1) - "" - (cadr parts)))) - ;(print $) -(tests:get-test-log run-id test-name item-name))) - - -(define (http-transport:html-dboard $) - (let* ((page ($ 'page)) - (oup (open-output-string)) - (bdy "--------------------------") - - (ret (tests:dynamic-dboard page))) - (s:output-new oup ret) - (close-output-port oup) - - (set! bdy (get-output-string oup)) - (conc "

Dashboard

" bdy "

" ))) - -(define (http-transport:main-page) - (let ((linkpath (root-path))) - (conc "

" (pathname-strip-directory *toppath*) "

" - "" - "Run area: " *toppath* - "

Server Stats

" - (http-transport:stats-table) - "
" - (http-transport:runs linkpath) - "
" - ;; (http-transport:run-stats) - "" - ))) - -(define (http-transport:stats-table) - (mutex-lock! *heartbeat-mutex*) - (let ((res - (conc "" - ;; "" - "" - "" - "" - ;; "" - "" - "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) - "n/a (no writes)" - (/ *writes-total-delay* - *number-of-writes*)) - " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - " ms
Last access" (seconds->time-string *db-last-access*) "
"))) - (mutex-unlock! *heartbeat-mutex*) - res)) - -(define (http-transport:runs linkpath) - (conc "

Runs

" - (string-intersperse - (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) - (map (lambda (p) - (conc "" p "
")) - files)) - " "))) - -#;(define (http-transport:run-stats) - (let ((stats (open-run-close db:get-running-stats #f))) - (conc "" - (string-intersperse - (map (lambda (stat) - (conc "")) - stats) - " ") - "
" (car stat) "" (cadr stat) "
"))) ADDED http-transportmod.scm Index: http-transportmod.scm ================================================================== --- /dev/null +++ http-transportmod.scm @@ -0,0 +1,719 @@ + +;; Copyright 2006-2012, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +(declare (unit apimod)) +(declare (uses commonmod)) +(declare (uses stml2)) +(declare (uses apimod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses mtconfigf)) +(declare (uses testsmod)) + +(module apimod + * + +(import scheme chicken data-structures extras posix files + srfi-13 srfi-18 spiffy http-client + spiffy-directory-listing + spiffy-request-vars tcp + ) +(import commonmod) +(import stml2) +(import apimod) +(import dbmod) +(import testsmod) +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) + +(define (http-transport:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) +(define *writes-total-delay* 0) + +;;====================================================================== +;; S E R V E R +;; ====================================================================== + +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; start_server +;; +(define (server:launch run-id transport-type) + ;; (case transport-type + ;; ((http) + (http-transport:launch)) + ;;((nmsg)(nmsg-transport:launch run-id)) + ;;((rpc) (rpc-transport:launch run-id)) + ;; (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) + +(define *db:process-queue-mutex* (make-mutex)) + +(define (http-transport:run hostn) + ;; Configurations for server + (tcp-buffer-size 2048) + (max-connections 2048) + (debug:print 2 *default-log-port* "Attempting to start the server ...") + (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily + (hostname (get-host-name)) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + (server:get-best-guess-address hostname) + #f))) + (if ipstr ipstr hostn))) ;; hostname))) + (start-port (portlogger:open-run-close portlogger:find-port)) + (link-tree-path (common:get-linktree)) + (tmp-area (common:get-db-tmp-area)) + (start-file (conc tmp-area "/.server-start"))) + (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) + ;; set some parameters for the server + (root-path (if link-tree-path + link-tree-path + (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! + (handle-directory spiffy-directory-listing) + (handle-exception (lambda (exn chain) + (signal (make-composite-condition + (make-property-condition + 'server + 'message "server error"))))) + + ;; http-transport:handle-directory) ;; simple-directory-handler) + ;; Setup the web server and a /ctrl interface + ;; + (vhost-map `(((* any) . ,(lambda (continue) + ;; open the db on the first call + ;; This is were we set up the database connections + (let* (($ (request-vars source: 'both)) + (dat ($ 'dat)) + (res #f)) + (cond + ((equal? (uri-path (request-uri (current-request))) + '(/ "api")) + (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc + headers: '((content-type text/plain))) + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*)) + ((equal? (uri-path (request-uri (current-request))) + '(/ "")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "json_api")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "runs")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ any)) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "hey")) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "jquery3.1.0.js")) + (send-response body: (http-transport:show-jquery) + headers: '((content-type application/javascript)))) + #;((equal? (uri-path (request-uri (current-request))) + '(/ "test_log")) + (send-response body: (http-transport:html-test-log $) + headers: '((content-type text/HTML)))) + ;; removing dashboard due to dependency challenges + #;((equal? (uri-path (request-uri (current-request))) + '(/ "dashboard")) + (send-response body: (http-transport:html-dboard $) + headers: '((content-type text/HTML)))) + (else (continue)))))))) + (with-output-to-file start-file (lambda ()(print (current-process-id)))) + (http-transport:try-start-server ipaddrstr start-port))) + +;; This is recursively run by http-transport:run until sucessful +;; +(define (http-transport:try-start-server ipaddrstr portnum) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) + (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) + (if (not config-use-proxy) + (determine-proxy (constantly #f))) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + (thread-sleep! 0.1) + + ;; get_next_port goes here + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server")))) + ;; any error in following steps will result in a retry + (set! *server-info* (list ipaddrstr portnum)) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + ;; This starts the spiffy server + ;; NEED WAY TO SET IP TO #f TO BIND ALL + ;; (start-server bind-address: ipaddrstr port: portnum) + (if config-hostname ;; this is a hint to bind directly + (start-server port: portnum bind-address: (if (equal? config-hostname "-") + ipaddrstr + config-hostname)) + (start-server port: portnum)) + (portlogger:open-run-close portlogger:set-port portnum "released") + (debug:print 1 *default-log-port* "INFO: server has been stopped")))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +(define *http-mutex* (make-mutex)) + +;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here +;; I'm pretty sure it is defunct. + +;; This next block all imported en-mass from the api branch +(define *http-requests-in-progress* 0) +(define *http-connections-next-cleanup* (current-seconds)) + +(define (http-transport:get-time-to-cleanup) + (let ((res #f)) + (mutex-lock! *http-mutex*) + (set! res (> (current-seconds) *http-connections-next-cleanup*)) + (mutex-unlock! *http-mutex*) + res)) + +(define (http-transport:inc-requests-count) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) + ;; Use this opportunity to slow things down iff there are too many requests in flight + (if (> *http-requests-in-progress* 5) + (begin + (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") + (thread-sleep! 1))) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count proc) + (mutex-lock! *http-mutex*) + (proc) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count-and-close-all-connections) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds + (if (> *http-requests-in-progress* 0) + (if (> etime (current-seconds)) + (begin + (thread-sleep! 0.05) + (loop etime)) + (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) + (close-all-connections!))) + (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:inc-requests-and-prep-to-close-all-connections) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) + +;; Send "cmd" with json payload "params" to serverdat and receive result +;; +(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) + (let* ((fullurl (if (vector? serverdat) + (http-transport:server-dat-get-api-req serverdat) + (begin + (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") + (exit 1)))) + (res (vector #f "uninitialized")) + (success #t) + (sparams (db:obj->string params transport: 'http)) + (runremote (or area-dat *runremote*))) + (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") + ;; set up the http-client here + (max-retry-attempts 1) + ;; consider all requests indempotent + (retry-request? (lambda (request) + #f)) + ;; send the data and get the response + ;; extract the needed info from the http data and + ;; process and return it. + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) + ;; ((exn http client-error) e (print e))) + (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. + success + (db:string->obj + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (set! success #f) + (if (debug:debug-mode 1) + (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") + (begin + (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") + (debug:print 0 *default-log-port* " message: " msg) + (debug:print 0 *default-log-port* " cmd: " cmd " params: " params) + (debug:print 0 *default-log-port* " call-chain: " call-chain))) + (if runremote + (remote-conndat-set! runremote #f)) + ;; Killing associated server to allow clean retry.") + ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + (mutex-unlock! *http-mutex*) + ;;; (signal (make-composite-condition + ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) + ;;; "communications failed" + (db:obj->string #f)) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key (or *server-id* "thekey")) + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) + transport: 'http) + 0)) ;; added this speculatively + ;; Shouldn't this be a call to the managed call-all-connections stuff above? + (close-all-connections!) + (mutex-unlock! *http-mutex*) + )) + (time-out (lambda () + (thread-sleep! 45) + #f)) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (thread-terminate! th2) + (debug:print-info 11 *default-log-port* "got res=" res) + (if (vector? res) + (if (vector-ref res 0) ;; this is the first flag or the second flag? + res ;; this is the *inner* vector? seriously? why? + (if (debug:debug-mode 11) + (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it + (print-call-chain (current-error-port)) + (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 11 *default-log-port* " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref res 0))) + res)) + (signal (make-composite-condition + (make-property-condition + 'timeout + 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) + +;; careful closing of connections stored in *runremote* +;; +(define (http-transport:close-connections #!key (area-dat #f)) + (let* ((runremote (or area-dat *runremote*)) + (server-dat (if runremote + (remote-conndat runremote) + #f))) ;; (hash-table-ref/default *runremote* run-id #f))) + (if (vector? server-dat) + (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) + (close-connection! api-dat) + ;;(close-idle-connections!) + #t)) + #f))) + +;; Moved to commonmod +;; +;; (define (make-http-transport:server-dat)(make-vector 6)) +;; (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) +;; (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) +;; (define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) +;; (define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) +;; (define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) +;; (define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) +;; (define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) +;; +;; (define (http-transport:server-dat-make-url vec) +;; (if (and (http-transport:server-dat-get-iface vec) +;; (http-transport:server-dat-get-port vec)) +;; (conc "http://" +;; (http-transport:server-dat-get-iface vec) +;; ":" +;; (http-transport:server-dat-get-port vec)) +;; #f)) +;; +;; (define (http-transport:server-dat-update-last-access vec) +;; (if (vector? vec) +;; (vector-set! vec 5 (current-seconds)) +;; (begin +;; (print-call-chain (current-error-port)) +;; (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) + +;; +;; connect +;; +(define (http-transport:client-connect iface port) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri)) + (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) + server-dat)) + +;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (http-transport:keep-running) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + (let* ((sdat #f) + (tmp-area (common:get-db-tmp-area)) + (started-file (conc tmp-area "/.server-started")) + (server-start-time (current-seconds)) + (server-info (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (begin + (debug:print-info 0 *default-log-port* "Received server alive signature") + (common:save-pkt `((action . alive) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat))) + *configdat* #t) + sdat) + (begin + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (common:save-pkt `((action . died) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat)) + (msg . "Transport died?")) + *configdat* #t) + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat))))))) + (iface (car server-info)) + (port (cadr server-info)) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (server-going #f) + (server-log-file (args:get-arg "-log"))) ;; always set when we are a server + + (with-output-to-file started-file (lambda ()(print (current-process-id)))) + + (let loop ((count 0) + (server-state 'available) + (bad-sync-count 0) + (start-time (current-milliseconds))) + ;; Use this opportunity to sync the tmp db to megatest.db + (if (not server-going) ;; *dbstruct-db* + (begin + (debug:print 0 *default-log-port* "SERVER: dbprep") + (set! *dbstruct-db* (db:setup #t)) ;; run-id)) + (set! server-going #t) + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (thread-start! *watchdog*))) + + ;; when things go wrong we don't want to be doing the various queries too often + ;; so we strive to run this stuff only every four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) + + ;; Check that iface and port have not changed (can happen if server port collides) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + + (if (not (equal? sdat (list iface port))) + (let ((new-iface (car sdat)) + (new-port (cadr sdat))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (set! iface new-iface) + (set! port new-port) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (mutex-lock! *heartbeat-mutex*) + (set! last-access *db-last-access*) + (mutex-unlock! *heartbeat-mutex*) + + (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) + (begin + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (let ((curr-time (current-seconds))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") + (if (not *server-overloaded*) + (change-file-times server-log-file curr-time curr-time))))) + (loop 0 server-state bad-sync-count (current-milliseconds))) + (else + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (http-transport:server-shutdown port))))))) + +(define (http-transport:server-shutdown port) + (begin + ;;(BB> "http-transport:server-shutdown called") + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; + ;; start_shutdown + ;; + (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up + (portlogger:open-run-close portlogger:set-port port "released") + (thread-sleep! 1) + + ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) + ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) + ;; (debug:print-info 0 *default-log-port* "Average cached write time " + ;; (if (eq? *number-of-writes* 0) + ;; "n/a (no writes)" + ;; (/ *writes-total-delay* + ;; *number-of-writes*)) + ;; " ms") + ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) + ;; (debug:print-info 0 *default-log-port* "Average non-cached time " + ;; (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + ;; " ms") + + (db:print-current-query-stats) + (common:save-pkt `((action . exit) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") + (exit))) + +;; all routes though here end in exit ... +;; +;; start_server? +;; +(define (http-transport:launch) + ;; check that a server start is in progress, pause or exit if so + (let* ((tmp-area (common:get-db-tmp-area)) + (server-start (conc tmp-area "/.server-start")) + (server-started (conc tmp-area "/.server-started")) + (start-time (common:lazy-modification-time server-start)) + (started-time (common:lazy-modification-time server-started)) + (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting + (start-time-old (> (- (current-seconds) start-time) 5)) + (cleanup-proc (lambda (msg) + (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) + (full-serv-fname (conc *toppath* "/logs/" serv-fname)) + (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) + (debug:print 0 *default-log-port* msg) + (if (common:file-exists? full-serv-fname) + (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) + (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) + (exit))))) + (if (and (not start-time-old) ;; last server start try was less than five seconds ago + (not server-starting)) + (begin + (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") + (exit))) + ;; lets not even bother to start if there are already three or more server files ready to go + (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + (if (> num-alive 3) + (begin + (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) + (exit)))) + (common:save-pkt `((action . start) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit)))) + +;; (define (http-transport:server-signal-handler signum) +;; (signal-mask! signum) +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* " ... exiting ...") +;; (let ((th1 (make-thread (lambda () +;; (thread-sleep! 1)) +;; "eat response")) +;; (th2 (make-thread (lambda () +;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") +;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff +;; (debug:print 0 *default-log-port* " Done.") +;; (exit 4)) +;; "exit on ^C timer"))) +;; (thread-start! th2) +;; (thread-start! th1) +;; (thread-join! th2)))) + +;;=============================================== +;; Java script +;;=============================================== +(define (http-transport:show-jquery) + (let* ((data (tests:readlines *java-script-lib*))) +(string-join data "\n"))) + + + +;;====================================================================== +;; web pages +;;====================================================================== + +#;(define (http-transport:html-test-log $) + (let* ((run-id ($ 'runid)) + (test-item ($ 'testname)) + (parts (string-split test-item ":")) + (test-name (car parts)) + + (item-name (if (equal? (length parts) 1) + "" + (cadr parts)))) + ;(print $) + (tests:get-test-log run-id test-name item-name))) + + +#;(define (http-transport:html-dboard $) + (let* ((page ($ 'page)) + (oup (open-output-string)) + (bdy "--------------------------") + + (ret (tests:dynamic-dboard page))) + (s:output-new oup ret) + (close-output-port oup) + + (set! bdy (get-output-string oup)) + (conc "

Dashboard

" bdy "

" ))) + +(define (http-transport:main-page) + (let ((linkpath (root-path))) + (conc "

" (pathname-strip-directory *toppath*) "

" + "" + "Run area: " *toppath* + "

Server Stats

" + (http-transport:stats-table) + "
" + (http-transport:runs linkpath) + "
" + ;; (http-transport:run-stats) + "" + ))) + +(define (http-transport:stats-table) + (mutex-lock! *heartbeat-mutex*) + (let ((res + (conc "" + ;; "" + "" + "" + "" + ;; "" + "" + "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) + "n/a (no writes)" + (/ *writes-total-delay* + *number-of-writes*)) + " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + " ms
Last access" (seconds->time-string *db-last-access*) "
"))) + (mutex-unlock! *heartbeat-mutex*) + res)) + +(define (http-transport:runs linkpath) + (conc "

Runs

" + (string-intersperse + (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) + (map (lambda (p) + (conc "" p "
")) + files)) + " "))) + +#;(define (http-transport:run-stats) + (let ((stats (open-run-close db:get-running-stats #f))) + (conc "" + (string-intersperse + (map (lambda (stat) + (conc "")) + stats) + " ") + "
" (car stat) "" (cadr stat) "
"))) +) Index: launch-inc.scm ================================================================== --- launch-inc.scm +++ launch-inc.scm @@ -14,1699 +14,5 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . -;;====================================================================== -;; launch a task - this runs on the originating host, tests themselves -;; -;;====================================================================== - - -;;====================================================================== -;; ezsteps -;;====================================================================== - -;; ezsteps were going to be coded as -;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute -;; BUT -;; now are -;; stepname {VAR=first,second,third ...} command ... -;; where the {VAR=first,second,third ...} is optional. - -;; given an exit code and whether or not logpro was used calculate OK/BAD -;; return #t if we are ok, #f otherwise -(define (steprun-good? logpro exitcode) - (or (eq? exitcode 0) - (and logpro (eq? exitcode 2)))) - -;; if handed a string, process it, else look for MT_CMDINFO -(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) - (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) - (if enccmd - (common:read-encoded-string enccmd) - '()))) - -;; 0 1 2 3 -(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) - -;; return (conc status ": " comment) from the final section so that -;; the comment can be set in the step record in launch.scm -;; -(define (launch:load-logpro-dat run-id test-id stepname) - (let ((cname (conc stepname ".dat"))) - (if (common:file-exists? cname) - (let* ((dat (read-config cname #f #f)) - (csvr (db:logpro-dat->csv dat stepname)) - (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) - (fmt-csv (map list->csv-record csvr)))) - (status (configf:lookup dat "final" "exit-status")) - (msg (configf:lookup dat "final" "message"))) - (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro - (rmt:csv->test-data run-id test-id csvt) - (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) - ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) - ;; ) - (cond - ((equal? status "PASS") "PASS") ;; skip the message part if status is pass - (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) - (else #f))) - #f))) - -(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm - (let* ((stepname (car ezstep)) ;; do stuff to run the step - (stepinfo (cadr ezstep)) - ;; (let ((info (cadr ezstep))) - ;; (if (proc? info) "" info))) - ;; (stepproc (let ((info (cadr ezstep))) - ;; (if (proc? info) info #f))) - (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) - (stepparams (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each - (paramparts (if (string? stepparams) - (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) - '())) - (subrun (alist-ref "subrun" paramparts equal?)) - (stepcmd (list-ref stepparts 3)) - (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ - (logpro-file (conc stepname ".logpro")) - (html-file (conc stepname ".html")) - (dat-file (conc stepname ".dat")) - (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) - (logpro-used (common:file-exists? logpro-file))) - - (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams - ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) - - (if (and tconfig-logpro - (not logpro-used)) ;; no logpro file found but have a defn in the testconfig - (begin - (with-output-to-file logpro-file - (lambda () - (print ";; logpro file extracted from testconfig\n" - ";;") - (print tconfig-logpro))) - (set! logpro-used #t))) - - ;; NB// can safely assume we are in test-area directory - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts - " stepparams: " stepparams " stepcmd: " stepcmd) - - ;; ;; first source the previous environment - ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") - ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) - ;; (if (and prevstep (common:file-exists? prev-env)) - ;; (set! script (conc script "source " prev-env)))) - - ;; call the command using mt_ezstep - ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) - - (debug:print 4 *default-log-port* "script: " script) - (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) - ;; now launch the actual process - (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) - (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") - (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 - (pid #f)) - (let ((proc (lambda () - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (if subrun - (begin - (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") - (common:without-vars proc "^MT_.*")) - (proc))) - - (with-output-to-file "Makefile.ezsteps" - (lambda () - (print stepname ".log :") - (print "\t" cmd) - (if (common:file-exists? (conc stepname ".logpro")) - (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) - (print) - (print stepname " : " stepname ".log") - (print)) - #:append) - - (rmt:test-set-top-process-pid run-id test-id pid) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1)))) - ))))) - (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - ;; now run logpro if needed - (if logpro-used - (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) - (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))))) - (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) - - (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (logfna (if logpro-used (conc stepname ".html") "")) - (comment #f)) - (if logpro-used - (let ((datfile (conc stepname ".dat"))) - ;; load the .dat file into the test_data table if it exists - (if (common:file-exists? datfile) - (set! comment (launch:load-logpro-dat run-id test-id stepname))) - (rmt:test-set-log! run-id test-id (conc stepname ".html")))) - (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) - ;; set the test final status - (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (this-step-status (cond - ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings - ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check - ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived - ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort - ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip - ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass - (else 'fail))) - (overall-status (cond - ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3) - (else 'fail))) - (next-status (cond - ((eq? overall-status 'pass) this-step-status) - ((eq? overall-status 'warn) - (if (eq? this-step-status 'fail) 'fail 'warn)) - ((eq? overall-status 'abort) 'abort) - (else 'fail))) - (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? - (cond - ((null? tal) ;; more to run? - "COMPLETED") - (else "RUNNING")))) - (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used - " this-step-status: " this-step-status " overall-status: " overall-status - " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) - (case next-status - ((warn) - (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WARN" - (if (eq? this-step-status 'warn) "Logpro warning found" #f) - #f)) - ((check) - (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "CHECK" - (if (eq? this-step-status 'check) "Logpro check found" #f) - #f)) - ((waived) - (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WAIVED" - (if (eq? this-step-status 'check) "Logpro waived found" #f) - #f)) - ((abort) - (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "ABORT" - (if (eq? this-step-status 'abort) "Logpro abort found" #f) - #f)) - ((skip) - (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "SKIP" - (if (eq? this-step-status 'skip) "Logpro skip found" #f) - #f)) - ((pass) - (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) - (else ;; 'fail - (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" - (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) - ))) - logpro-used)) - -(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) - ;; (let-values - ;; (((pid exit-status exit-code) - ;; (run-n-wait fullrunscript))) - ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) - ;; Since we should have a clean slate at this time there is no need to do - ;; any of the other stuff that tests:test-set-status! does. Let's just - ;; force RUNNING/n/a - - ;; (thread-sleep! 0.3) - ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - ;; if there is a runscript do it first - (if fullrunscript - (let ((pid (process-run fullrunscript))) - (rmt:test-set-top-process-pid run-id test-id pid) - (let loop ((i 0)) - (let-values - (((pid-val exit-status exit-code) (process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (loop (+ i 1))) - ))))) - ;; then, if runscript ran ok (or did not get called) - ;; do all the ezsteps (if any) - (if (or ezsteps subrun) - (let* ((test-run-dir (tests:get-test-path-from-environment)) - (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? - ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic - ;; ezstep names need a full re-eval here. - (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) - (ezstepslst (if (hash-table? testconfig) - (hash-table-ref/default testconfig "ezsteps" '()) - #f))) - (if testconfig - (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... - (begin - (launch:setup) - (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " - (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) - ;; after all that, still no testconfig? Time to abort - (if (not testconfig) - (begin - (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") - (exit 1))) - - ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry - ;; 1. get section [runarun] - ;; 2. unset MT_* vars - ;; 3. fix target - ;; 4. fix runname - ;; 5. fix testpatt or calculate it from contour - ;; 6. launch the run - ;; 7. roll up the run result and or roll up the logpro processed result - (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested - (subrun:initialize-toprun-test testconfig test-run-dir) - (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) - (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") - (set! ezsteps #t) ;; set the needed flag - (set! ezstepslst - (append (or ezstepslst '()) - (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) - - ;; process the ezsteps - (if ezsteps - (begin - (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) - ;; if ezsteps was defined then we are sure to have at least one step but check anyway - (if (not (> (length ezstepslst) 0)) - (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") - (let loop ((ezstep (car ezstepslst)) - (tal (cdr ezstepslst)) - (prevstep #f)) - (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") - ;; check exit-info (vector-ref exit-info 1) - (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) - (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)) - (stepname (car ezstep))) - ;; if logpro-used read in the stepname.dat file - (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) - (launch:load-logpro-dat run-id test-id stepname)) - (if (steprun-good? logpro-used (launch:einf-exit-code exit-info)) - (if (not (null? tal)) - (loop (car tal) (cdr tal) stepname)) - (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) - (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) - -(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) - (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) - (start-seconds (current-seconds)) - (calc-minutes (lambda () - (inexact->exact - (round - (- - (current-seconds) - start-seconds))))) - (kill-tries 0)) - ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) - ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) - (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) - - (let loop ((minutes (calc-minutes)) - (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (disk-free (get-df (current-directory))) - (last-sync (current-seconds))) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) - (let* ((over-time (> (current-seconds) (+ last-sync update-period))) - (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (delta (abs (- load cpu-load)))) - (if (> delta 0.1) ;; don't bother updating with small changes - load - #f))) - (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds - (get-df (current-directory)) - disk-free)) - (delta (abs (- df disk-free)))) - (if (and (> df 0) - (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg - df - #f))) - (do-sync (or new-cpu-load new-disk-free over-time)) - - (test-info (rmt:get-test-info-by-id run-id test-id)) - (state (db:test-get-state test-info)) - (status (db:test-get-status test-info)) - (kill-reason "no kill reason specified") - (kill-job? #f)) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) - (cond - ((test-get-kill-request run-id test-id) - (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") - (set! kill-job? #t)) - ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) - (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) - (set! kill-job? #t)) - ((equal? status "DEAD") - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") - ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING - (set! kill-job? #f))) - - (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) - (launch:handle-zombie-tests run-id) - (when do-sync - ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) - ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) - - (if kill-job? - (begin - (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) - (mutex-lock! m) - ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this - ;; section and the runit section? Or add a loop that tries three times with a 1/4 second - ;; between tries? - (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) - (pid2 (rmt:test-get-top-process-pid run-id test-id)) - (pids (delete-duplicates (filter number? (list pid1 pid2))))) - (if (not (null? pids)) - (begin - (for-each - (lambda (pid) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) - (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") - (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) - ;; (if (process:alive? pid) - ;; (begin - (map (lambda (pid-num) - (process-signal pid-num signal/term)) - (process:get-sub-pids pid)) - (thread-sleep! 5) - ;; (if (process:process-alive? pid) - (map (lambda (pid-num) - (handle-exceptions - exn - #f - (process-signal pid-num signal/kill))) - (process:get-sub-pids pid)))) - ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) - pids) - ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? - (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt - (begin - (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) - (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt - ))) - (mutex-unlock! m) - ;; no point in sticking around. Exit now. But run end of run before exiting? - (launch:end-of-run-check run-id) - (exit))) - (if (hash-table-ref/default misc-flags 'keep-going #f) - (begin - (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses - (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta - (loop (calc-minutes) - (or new-cpu-load cpu-load) - (or new-disk-free disk-free) - (if do-sync (current-seconds) last-sync))))))) - (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional - - -(define (launch:execute encoded-cmd) - (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) - (tconfigreg #f)) - (setenv "MT_CMDINFO" encoded-cmd) - ;;(bb-check-path msg: "launch:execute incoming") - (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) - ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) - (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area - (top-path (assoc/default 'toppath cmdinfo)) - (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area - (test-name (assoc/default 'test-name cmdinfo)) - (runscript (assoc/default 'runscript cmdinfo)) - (ezsteps (assoc/default 'ezsteps cmdinfo)) - (subrun (assoc/default 'subrun cmdinfo)) - ;; (runremote (assoc/default 'runremote cmdinfo)) - ;; (transport (assoc/default 'transport cmdinfo)) ;; not used - ;; (serverinf (assoc/default 'serverinf cmdinfo)) - ;; (port (assoc/default 'port cmdinfo)) - (serverurl (assoc/default 'serverurl cmdinfo)) - (homehost (assoc/default 'homehost cmdinfo)) - (run-id (assoc/default 'run-id cmdinfo)) - (test-id (assoc/default 'test-id cmdinfo)) - (target (assoc/default 'target cmdinfo)) - (areaname (assoc/default 'areaname cmdinfo)) - (itemdat (assoc/default 'itemdat cmdinfo)) - (env-ovrd (assoc/default 'env-ovrd cmdinfo)) - (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar - (runname (assoc/default 'runname cmdinfo)) - (megatest (assoc/default 'megatest cmdinfo)) - (runtlim (assoc/default 'runtlim cmdinfo)) - (contour (assoc/default 'contour cmdinfo)) - (item-path (item-list->path itemdat)) - (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) - (keys #f) - (keyvals #f) - (fullrunscript (if (not runscript) - #f - (if (substring-index "/" runscript) - runscript ;; use unadultered if contains slashes - (let ((fulln (conc work-area "/" runscript))) - (if (and (common:file-exists? fulln) - (file-execute-access? fulln)) - fulln - runscript))))) ;; assume it is on the path - (check-work-area (lambda () - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:directory-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) - (begin - (debug:print 0 *default-log-port* - "INFO: we are expecting to be in directory " work-area "\n" - " but we are actually in the directory " (current-directory) "\n" - " doing another change dir.") - (change-directory work-area))) - - ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. - (let ((files (glob (conc testpath "/*"))) - (bad-files '())) - (for-each - (lambda (fullname) - (let* ((fname (pathname-strip-directory fullname)) - (targn (conc work-area "/" fname))) - (if (not (file-exists? targn)) - (set! bad-files (cons fname bad-files))))) - files) - (if (not (null? bad-files)) - (begin - (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") - (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) - (launch:test-copy testpath work-area)))) - ;; one more time, change to the work-area directory - (change-directory work-area))) - ) ;; let* - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path - (let ((sighand (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (if (eq? signum signal/stop) - (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) - (set! *time-to-exit* #t) - (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") - (let ((th1 (make-thread (lambda () - (print "set test to COMPLETED/ABORT begin.") - (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") - (print "set test to COMPLETED/ABORT complete.") - (print "Killed by signal " signum ". Exiting") - (exit 1)))) - (th2 (make-thread (lambda () - (thread-sleep! 20) - (debug:print 0 *default-log-port* "Done") - (exit 4))))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2))))) - (set-signal-handler! signal/int sighand) - (set-signal-handler! signal/term sighand) - ) ;; (set-signal-handler! signal/stop sighand) - - ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, - ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* - ;; - (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) - (test-host (if test-info - (db:test-get-host test-info) - (begin - (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") - (exit)))) - (test-pid (db:test-get-process_id test-info))) - (cond - ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. - ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun - (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - - (rmt:general-call 'set-test-start-time #f test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - ) ;; prime it for running - ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) - (if (process:alive-on-host? test-host test-pid) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - (exit))) - ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - (rmt:general-call 'set-test-start-time #f test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - ) - (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") - (exit)))) - - ;; cleanup prior execution's steps - (rmt:delete-steps-for-test! run-id test-id) - - (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) - (set! keys (rmt:get-keys)) - ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process - ;; one of these is defunct/redundant ... - (if (not (launch:setup force-reread: #t)) - (begin - (debug:print 0 *default-log-port* "Failed to setup, exiting") - ;; (sqlite3:finalize! db) - ;; (sqlite3:finalize! tdb) - (exit 1))) - ;; validate that the test run area is available - (check-work-area) - - ;; still need to go back to run area home for next couple steps - (change-directory *toppath*) - - ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This - ;; seems non-ideal but could well break stuff - ;; BUG? BUG? BUG? - - (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) - (wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists - ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) - ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) - ;; Now have runconfigs data loaded, set environment vars - (for-each - (lambda (section) - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if (and (string? var)(string? val)) - (begin - (safe-setenv var (configf:eval-string-in-environment val))) ;; val) - (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) - (configf:get-section rconfig section))) - (list "default" target))) - ;;(bb-check-path msg: "launch:execute post block 1") - - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:file-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - ;; now we can switch to the work-area? - (change-directory work-area) - ;;(bb-check-path msg: "launch:execute post block 1.5") - ;; (change-directory work-area) - (set! keyvals (keys:target->keyval keys target)) - ;; apply pre-overrides before other variables. The pre-override vars must not - ;; clobbers things from the official sources such as megatest.config and runconfigs.config - (if (string? set-vars) - (let ((varpairs (string-split set-vars ","))) - (debug:print 4 *default-log-port* "varpairs: " varpairs) - (map (lambda (varpair) - (let ((varval (string-split varpair "="))) - (if (eq? (length varval) 2) - (let ((var (car varval)) - (val (cadr varval))) - (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") - (setenv var val))))) - varpairs))) - ;;(bb-check-path msg: "launch:execute post block 2") - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if val - (setenv var val) - (begin - (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") - (exit))))) - (list - (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_ITEMPATH" item-path) - (list "MT_RUNNAME" runname) - (list "MT_MEGATEST" megatest) - (list "MT_TARGET" target) - (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) - ;;(bb-check-path msg: "launch:execute post block 3") - - (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) - ;;(bb-check-path msg: "launch:execute post block 4") - ;; (change-directory top-path) - ;; Can setup as client for server mode now - ;; (client:setup) - - - ;; environment overrides are done *before* the remaining critical envars. - (alist->env-vars env-ovrd) - ;;(bb-check-path msg: "launch:execute post block 41") - (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) - ;;(bb-check-path msg: "launch:execute post block 42") - (set-item-env-vars itemdat) - ;;(bb-check-path msg: "launch:execute post block 43") - (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) - (if blacklist - (let ((vars (string-split blacklist))) - (save-environment-as-files "megatest" ignorevars: vars) - (for-each (lambda (var) - (unsetenv var)) - vars)) - (save-environment-as-files "megatest"))) - ;;(bb-check-path msg: "launch:execute post block 44") - ;; open-run-close not needed for test-set-meta-info - ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) - ;; (tests:set-full-meta-info test-id run-id 0 work-area) - (tests:set-full-meta-info #f test-id run-id 0 work-area 10) - - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - (if (args:get-arg "-xterm") - (set! fullrunscript "xterm") - (if (and fullrunscript - (common:file-exists? fullrunscript) - (not (file-execute-access? fullrunscript))) - (system (conc "chmod ug+x " fullrunscript)))) - - ;; We are about to actually kick off the test - ;; so this is a good place to remove the records for - ;; any previous runs - ;; (db:test-remove-steps db run-id testname itemdat) - ;; now is also a good time to write the .testconfig file - (let* ((tconfig-fname (conc work-area "/.testconfig")) - (tconfig-tmpfile (conc tconfig-fname ".tmp")) - (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) - (configf:write-alist tconfig tconfig-tmpfile) - (file-move tconfig-tmpfile tconfig-fname #t)) - ;; - (let* ((m (make-mutex)) - (kill-job? #f) - (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status - (job-thread #f) - ;; (keep-going #t) - (misc-flags (let ((ht (make-hash-table))) - (hash-table-set! ht 'keep-going #t) - ht)) - (runit (lambda () - (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) - (monitorjob (lambda () - (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) - (th1 (make-thread monitorjob "monitor job")) - (th2 (make-thread runit "run job"))) - (set! job-thread th2) - (thread-start! th1) - (thread-start! th2) - (thread-join! th2) - (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") - (hash-table-set! misc-flags 'keep-going #f) - (thread-join! th1) - (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. - (mutex-lock! m) - (let* ((item-path (item-list->path itemdat)) - ;; only state and status needed - use lazy routine - (testinfo (rmt:get-testinfo-state-status run-id test-id))) - ;; Am I completed? - (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) - (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status - ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test - ) - (new-status (cond - ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) - ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) - ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) - ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") - ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") - ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") - ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") - (else "FAIL")))) ;; (db:test-get-status testinfo))) - (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) - (tests:test-set-status! run-id - test-id - new-state - new-status - (args:get-arg "-m") #f) - ;; need to update the top test record if PASS or FAIL and this is a subtest - ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! - )) - ;; for automated creation of the rollup html file this is a good place... - (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) - (tests:summarize-test run-id test-id) ;; don't force - just update if no - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) - (mutex-unlock! m) - (launch:end-of-run-check run-id ) - (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " - work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") - (if (not (launch:einf-exit-status exit-info)) - (exit 4)))) - ))) - -;; Spec for End of test -;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup -;; At transition to run COMPLETED/X do hooks -;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND -;; we can prove the process is not alive (ssh host pstree -A pid) -;; if dead safe to mark the test as killed in the db -;; State/status table -;; new -;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup -;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na -;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED -;; 0 RUNNING ==> this is actually the first condition, should not get here - -(define (launch:end-of-run-check run-id ) - (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) - (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) - (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) - (current-state (rmt:get-run-state run-id)) - (current-status (rmt:get-run-status run-id))) - ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing - (debug:print 0 *default-log-port* "rollup run state/status") - (rmt:set-state-status-and-roll-up-run run-id current-state current-status) - - (cond - ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) - (debug:print 0 *default-log-port* "look for post hook.") - (runs:run-post-hook run-id)) - ((> running-cnt 3) - (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) - ((> running-cnt 0) - (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) - (let ((kill-cnt (launch:kill-tests-if-dead run-id))) - (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) - (launch:end-of-run-check run-id)))) ;;todo - (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) - (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (if (> (length not-completed-tests) 0) - (let loop ((running-test (car not-completed-tests)) - (tal (cdr not-completed-tests))) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11))) - (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") - (if (not (null? tal)) - (loop (car tal) (cdr tal))))))))))) - -(define (launch:is-test-alive host pid) -(if (and host pid (not (equal? host "n/a"))) -(let* ((cmd (conc "ssh " host " pstree -A " pid)) - (output (with-input-from-pipe cmd read-lines))) - (print "cmd: " cmd "\n op: " output ) - (if(eq? (length output) 0) - #f - #t)) -#t)) - -(define (launch:kill-tests-if-dead run-id) - (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (let loop ((running-test (car running-tests)) - (tal (cdr running-tests)) - (kill-cnt 0)) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11)) - (test-id (vector-ref running-test 0)) - (host (vector-ref running-test 6)) - (pid (rmt:test-get-top-process-pid run-id test-id)) - (event-time (vector-ref running-test 5)) - (duration (vector-ref running-test 12)) - (flag 0) - (curr-time (current-seconds))) - (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed - (begin - (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") - (set! flag 1) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) - (if (not (null? tal)) - (loop (car tal) (cdr tal) (+ kill-cnt flag)) - (+ kill-cnt flag)))))) - -;; DO NOT USE - caching of configs is handled in launch:setup now. -;; -(define (launch:cache-config) - ;; if we have a linktree and -runtests and -target and the directory exists dump the config - ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg - (if (and *configdat* - (or (args:get-arg "-run") - (args:get-arg "-runtests") - (args:get-arg "-execute"))) - (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) - (target (common:args-get-target exit-if-bad: #t)) - (runname (or (args:get-arg "-runname") - (args:get-arg ":runname") - (getenv "MT_RUNNAME"))) - (fulldir (conc linktree "/" - target "/" - runname))) - (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree - (begin - (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) - (if (not (common:file-exists? fulldir)) - (create-directory fulldir #t)) ;; need to protect with exception handler - (if (and target - runname - (common:file-exists? fulldir)) - (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) - (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) - (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) - (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached - (begin - (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) - (if (not (common:in-running-test?)) - (configf:write-alist *configdat* tmpfile)) - (system (conc "ln -sf " tmpfile " " targfile)))) - ))) - (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) - - -;; gather available information, if legit read configs in this order: -;; -;; if have cache; -;; read it a return it -;; else -;; megatest.config (do not cache) -;; runconfigs.config (cache if all vars avail) -;; megatest.config (cache if all vars avail) -;; returns: -;; *toppath* -;; side effects: -;; sets; *configdat* (megatest.config info) -;; *runconfigdat* (runconfigs.config info) -;; *configstatus* (status of the read data) -;; -(define (launch:setup #!key (force-reread #f) (areapath #f)) - (mutex-lock! *launch-setup-mutex*) - (if (and *toppath* - (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all - (begin - (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") - (mutex-unlock! *launch-setup-mutex*) - *toppath*) - (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) - (mutex-unlock! *launch-setup-mutex*) - res))) - -;; return paths depending on what info is available. -;; -(define (launch:get-cache-file-paths areapath toppath target mtconfig) - (let* ((use-cache (common:use-cache?)) - (runname (common:args-get-runname)) - (linktree (common:get-linktree)) - (testname (common:get-full-test-name)) - (rundir (if (and runname target linktree) - (common:directory-writable? (conc linktree "/" target "/" runname)) - #f)) - (testdir (if (and rundir testname) - (common:directory-writable? (conc rundir "/" testname)) - #f)) - (cachedir (or testdir rundir)) - (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) - (debug:print-info 6 *default-log-port* - "runname=" runname - "\n linktree=" linktree - "\n testname=" testname - "\n rundir=" rundir - "\n testdir=" testdir - "\n cachedir=" cachedir - "\n mtcachef=" mtcachef - "\n rccachef=" rccachef) - (cons mtcachef rccachef))) - -(define (launch:setup-body #!key (force-reread #f) (areapath #f)) - (if (and (eq? *configstatus* 'fulldata) - *toppath* - (not force-reread)) ;; no need to reprocess - *toppath* ;; return toppath - (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. - (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath - (target (common:args-get-target)) - (sections (if target (list "default" target) #f)) ;; for runconfigs - (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... - (mtcachef (if (null? cachefiles) - #f - (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (if (null? cachefiles) - #f - (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) - ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) - (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource - ;;(BB> "launch:setup-body -- cachefiles="cachefiles) - (cond - ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME - ((and (not force-reread) - mtcachef rccachef - use-cache - (get-environment-variable "MT_RUN_AREA_HOME") - (common:file-exists? mtcachef) - (common:file-exists? rccachef)) - ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") - (set! *configdat* (configf:read-alist mtcachef)) - ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) - (set! *runconfigdat* (configf:read-alist rccachef)) - (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) - (set! *configstatus* 'fulldata) - (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) - *toppath*) - ;; there are no existing cached configs, do full reads of the configs and cache them - ;; we have all the info needed to fully process runconfigs and megatest.config - ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? - mtcachef - rccachef) ;; BB- why are we doing this without asking if caching is desired? - ;;(BB> "launch:setup-body -- cond branch 2") - (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (first-rundat (let ((toppath (if toppath - toppath - (car first-pass)))) - (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. - (conc (if (string? toppath) - toppath - (get-environment-variable "MT_RUN_AREA_HOME")) - "/runconfigs.config") - *runconfigdat* #t - sections: sections)))) - (set! *runconfigdat* first-rundat) - (if first-pass ;; - (begin - ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") - (set! *configdat* (car first-pass)) - ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) - (set! *configinfo* first-pass) - (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it - (set! toppath *toppath*) - (if (not *toppath*) - (begin - (debug:print-error 0 *default-log-port* "you are not in a megatest area!") - (exit 1))) - (setenv "MT_RUN_AREA_HOME" *toppath*) - ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it - (let* ((keys (rmt:get-keys)) - (key-vals (keys:target->keyval keys target)) - (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) - ; (if *configdat* - ; (configf:lookup *configdat* "setup" "linktree") - ; (conc *toppath* "/lt")))) - (second-pass (find-and-read-config - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config - (for-each (lambda (kt) - (setenv (car kt) (cadr kt))) - key-vals) - (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... - sections: sections))) - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - - (if rccachef - (common:fail-safe - (lambda () - (configf:write-alist runconfigdat rccachef)) - (conc "Could not write cache file - "rccachef))) - (if mtcachef - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef))) - (set! *runconfigdat* runconfigdat) - (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) - ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table - (set! *configdat* (make-hash-table)) - ))) - - ;; else read what you can and set the flag accordingly - ;; here we don't have either mtconfig or rccachef - (else - ;;(BB> "launch:setup-body -- cond branch 3 - else") - (let* ((cfgdat (find-and-read-config - (or (args:get-arg "-config") "megatest.config") - environ-patt: "env-override" - given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") - pathenvvar: "MT_RUN_AREA_HOME"))) - - (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) - (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) - (rdat (read-config (conc toppath ;; convert this to use runconfig:read! - "/runconfigs.config") *runconfigdat* #t sections: sections))) - (set! *configinfo* cfgdat) - (set! *configdat* (car cfgdat)) - (set! *runconfigdat* rdat) - (set! *toppath* toppath) - (set! *configstatus* 'partial)) - (begin - (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") - (exit 2)))))) - ;; COND ends here. - - ;; additional house keeping - (let* ((linktree (or (common:get-linktree) - (conc *toppath* "/lt")))) - (if linktree - (begin - (if (not (common:file-exists? linktree)) - (begin - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (exit 1)) - (create-directory linktree #t)))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) - (let ((tlink (conc *toppath* "/lt"))) - (if (not (common:file-exists? tlink)) - (create-symbolic-link linktree tlink))))) - (begin - (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") - ))) - (if (and *toppath* - (directory-exists? *toppath*)) - (begin - (setenv "MT_RUN_AREA_HOME" *toppath*) - (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) - (begin - (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") - (set! *toppath* #f) ;; force it to be false so we return #f - #f)) - - ;; one more attempt to cache the configs for future reading - (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) - (common:fail-safe - (lambda () - (configf:write-alist *runconfigdat* rccachef)) - (conc "Could not write cache file - "rccachef)) - ) - (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef)) - ) - (if (and rccachef mtcachef *runconfigdat* *configdat*) - (set! *configstatus* 'fulldata))) - - ;; if have -append-config then read and append here - (let ((cfname (args:get-arg "-append-config"))) - (if (and cfname - (file-read-access? cfname)) - (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. - *toppath*))) - -(define (get-best-disk confdat testconfig) - (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) - (hash-table-ref/default confdat "disks" #f))) - (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) - (string->number (or m "10000"))))) - (if disks - (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb - (if res - (cdr res) - (begin -;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") -;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) - ;;(exit 1) - (if (null? disks) - (cons 1 (conc *toppath* "/runs")) - (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) - (let loop ((head (car paths)) (tail (cdr paths))) - (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) - (if result - result - (if (null? tail) - (cons 1 (conc *toppath* "/runs")) - (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path. - - -(define (launch:test-copy test-src-path test-path) - (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) - (if cmd - ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH - (string-substitute "TEST_TARG_PATH" test-path - (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) - #f))) - (cmd (if ovrcmd - ovrcmd - (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" - " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) - (status (system cmd))) - (if (not (eq? status 0)) - (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) - - -;; Desired directory structure: -;; -;; - - -. -;; | -;; v -;; - - -|- -;; -;; dir stored in test is: -;; -;; - - [ - ] -;; -;; All log file links should be stored relative to the top of link path -;; -;; - [ - ] -;; -(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) - (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it - (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. - run-info - (db:get-value-by-header (db:get-rows run-info) - (db:get-header run-info) - "runname"))) - (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) - ;; convert back to db: from rdb: - this is always run at server end - (target (string-intersperse (map cadr keyvals) "/")) - - (not-iterated (equal? "" item-path)) - - ;; all tests are found at /test-base or /test-base - (testtop-base (conc target "/" runname "/" testname)) - (test-base (conc testtop-base (if not-iterated "" "/") item-path)) - - ;; nb// if itempath is not "" then it is prefixed with "/" - (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) - (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) - - ;; ensure this exists first as links to subtests must be created there - (linktree (common:get-linktree)) - ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree"))) - ;; (if rd rd (conc *toppath* "/runs")))) - ;; which seems wrong ... - - (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) - (lnkpath (conc lnkbase "/" testname)) - (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) - (lnktarget (conc lnkpath "/" item-path))) - - ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical - ;; rundir shortdir - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) - - (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) - (if (not (common:file-exists? linktree)) - (begin - (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) - (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) - ;; create the directory for the tests dir links, this is needed no matter what... try up to three times - (let loop ((done 3)) - (let ((success (if (and (not (common:directory-exists? lnkbase)) - (not (common:file-exists? lnkbase))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase) - (print-error-message exn (current-error-port)) - #t) - (create-directory lnkbase #t) - #f)))) - (if (and (not success)(> done 0)) - (loop (- done 1))))) - - ;; update the toptest record with its location rundir, cache the path - ;; This wass highly inefficient, one db write for every subtest, potentially - ;; thousands of unnecessary updates, cache the fact it was set and don't set it - ;; again. - - ;; Now create the link from the test path to the link tree, however - ;; if the test is iterated it is necessary to create the parent path - ;; to the iteration. use pathname-directory to trim the path by one - ;; level - (if (not not-iterated) ;; i.e. iterated - (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) - (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-directory iterated-parent #t)))) - - (if (symbolic-link? lnkpath) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (delete-file lnkpath))) - - (if (not (or (common:file-exists? lnkpath) - (symbolic-link? lnkpath))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-symbolic-link toptest-path lnkpath))) - - ;; NB - This was not working right - some top tests are not getting the path set!!! - ;; - ;; Do the setting of this record after the paths are created so that the shortdir can - ;; be set to the real directory location. This is safer for future clean up if the link - ;; tree is damaged or lost. - ;; - (if (not (hash-table-ref/default *toptest-paths* testname #f)) - (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) - (curr-test-path (if testinfo ;; (filedb:get-path *fdb* - ;; (db:get-path dbstruct - ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir testinfo) ;; ) ;; ) - #f))) - (hash-table-set! *toptest-paths* testname curr-test-path) - ;; NB// Was this for the test or for the parent in an iterated test? - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath - (if (common:file-exists? lnkpath) - ;; (resolve-pathname lnkpath) - (common:nice-path lnkpath) - lnkpath) - testname "" run-id) - ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) - (if (or (not curr-test-path) - (not (directory-exists? toptest-path))) - (begin - (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) - (handle-exceptions - exn - #f ;; don't care to catch and deal with errors here for now. - (create-directory toptest-path #t)) - (hash-table-set! *toptest-paths* testname toptest-path))))) - - ;; The toptest path has been created, the link to the test in the linktree has - ;; been created. Now, if this is an iterated test the real test dir must be created - (if (not not-iterated) ;; this is an iterated test - (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) - (debug:print 2 *default-log-port* "Setting up sub test run area") - (debug:print 2 *default-log-port* " - creating run area in " test-path) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-directory test-path #t)) - (debug:print 2 *default-log-port* - " - creating link from: " test-path "\n" - " to: " lnktarget) - - ;; If there is already a symlink delete it and recreate it. - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit)) - (if (symbolic-link? lnktarget) (delete-file lnktarget)) - (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) - - (if (not (directory? test-path)) - (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes - - (if (and test-src-path (directory? test-path)) - (begin - (launch:test-copy test-src-path test-path) - (list lnkpathf lnkpath )) - (if (and test-src-path (> remtries 0)) - (begin - (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) - ;; - (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) - (list #f #f))))) - - -(define (launch:handle-zombie-tests run-id) - (let* ((key (conc "zombiescan-runid-"run-id)) - (now (current-seconds)) - (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) - (val (rmt:get-var key)) - (do-scan? - (cond - ((not val) - #t) - ((< val threshold) - #t) - (else #f)))) - (when do-scan? - (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") - (rmt:set-var key (current-seconds)) - (rmt:find-and-mark-incomplete run-id #f)))) - - - - - -;; 1. look though disks list for disk with most space -;; 2. create run dir on disk, path name is meaningful -;; 3. create link from run dir to megatest runs area -;; 4. remotely run the test on allocated host -;; - could be ssh to host from hosts table (update regularly with load) -;; - could be netbatch -;; (launch-test db (cadr status) test-conf)) -(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) - (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ( ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) - (item-path (item-list->path itemdat)) - (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) - (let loop ((delta (- (current-seconds) *last-launch*)) - (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) - (if (> launch-delay delta) - (begin - (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. - (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) - (thread-sleep! (- launch-delay delta)) - (loop (- (current-seconds) *last-launch*) launch-delay)))) - (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) - (append - (list - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - (list "MT_RUNNAME" runname) - (list "MT_ITEMPATH" item-path) - (list "MT_CONTOUR" contour) - ) - itemdat)) - (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed - ;; for tconfig, why do we allow fallback to test-conf? - (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) - (begin - (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") - test-conf))) ;; force re-read now that all vars are set - (useshell (let ((ush (configf:lookup *configdat* "jobtools" "useshell"))) - (if ush - (if (equal? ush "no") ;; must use "no" to NOT use shell - #f - ush) - #t))) ;; default is yes - (runscript (configf:lookup tconfig "setup" "runscript")) - (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag - (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun - ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) - ;; (memory (configf:lookup tconfig "requirements" "memory")) - ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed - (remote-megatest (configf:lookup *configdat* "setup" "executable")) - (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") - (configf:lookup *configdat* "setup" "runtimelim"))) - ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to - ;; allow running from dashboard. Extract the path - ;; from the called megatest and convert dashboard - ;; or dboard to megatest - (local-megatest (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) "../megatest") - ((mtest) "../megatest") - ((dashboard) "megatest") - (else exe))))) - (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path - (work-area #f) - (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all - (diskpath #f) - (cmdparms #f) - (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) - (mt-bindir-path #f) - (testinfo (rmt:get-test-info-by-id run-id test-id)) - (mt_target (string-intersperse (map cadr keyvals) "/")) - (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) - (if (args:get-arg "-logging")(list "-logging") '())))) - ;; (if hosts (set! hosts (string-split hosts))) - ;; set the megatest to be called on the remote host - (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) - (set! mt-bindir-path (pathname-directory remote-megatest)) - (if launcher (set! launcher (string-split launcher))) - ;; set up the run work area for this test - (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run - (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir - (begin - (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) - (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record - - ;; prevent overlapping actions - set to LAUNCHED as early as possible - ;; - ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail - (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) - ;; (pp (hash-table->alist tconfig)) - (set! diskpath (get-best-disk *configdat* tconfig)) - (if diskpath - (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) - (set! work-area (car dat)) - (set! toptest-work-area (cadr dat)) - (debug:print-info 2 *default-log-port* "Using work area " work-area)) - (begin - (set! work-area (conc test-path "/tmp_run")) - (create-directory work-area #t) - (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) - (set! cmdparms (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda () ;; (list 'hosts hosts) - (write (list (list 'testpath test-path) - ;; (list 'transport (conc *transport-type*)) - ;; (list 'serverinf *server-info*) - (list 'homehost (let* ((hhdat (common:get-homehost))) - (if hhdat - (car hhdat) - #f))) - (list 'serverurl (if *runremote* - (remote-server-url *runremote*) - #f)) ;; - (list 'areaname (common:get-testsuite-name)) - (list 'toppath *toppath*) - (list 'work-area work-area) - (list 'test-name test-name) - (list 'runscript runscript) - (list 'run-id run-id ) - (list 'test-id test-id ) - ;; (list 'item-path item-path ) - (list 'itemdat itemdat ) - (list 'megatest remote-megatest) - (list 'ezsteps ezsteps) - (list 'subrun subrun) - (list 'target mt_target) - (list 'contour contour) - (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) - (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) - (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) - (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path)))))))) - - ;; clean out step records from previous run if they exist - ;; (rmt:delete-test-step-records run-id test-id) - ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway - (if (common:file-exists? work-area) - (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir - (cond - ;; ((and launcher hosts) ;; must be using ssh hostname - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) - (launcher - (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) - (else - (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) - (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) - (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) - (debug:print 1 *default-log-port* "Launching " work-area) - ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done - (debug:print 4 *default-log-port* "fullcmd: " fullcmd) - (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path) - ) - itemdat))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) - ;; Launchwait defaults to true, must override it to turn off wait - (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr-and-exitcode->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd)))) - (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) - (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) - (if (not success) - (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. - ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test - (if (not launchwait) ;; give the OS a little time to allow the process to start - (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" - (lambda () - (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) - (if (list? launch-results) - (apply print launch-results) - (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) - #:append)) - (debug:print 2 *default-log-port* "Launching completed, updating db") - (debug:print 2 *default-log-port* "Launch results: " launch-results) - (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) - (alist->env-vars miscprevvals) - (alist->env-vars testprevvals) - (alist->env-vars commonprevvals) - launch-results)) - (change-directory *toppath*))) - -;; recover a test where the top controlling mtest may have died -;; -(define (launch:recover-test run-id test-id) - ;; this function is called on the test run host via ssh - ;; - ;; 1. look at the process from pid - ;; - is it owned by calling user - ;; - it it's run directory correct for the test - ;; - is there a controlling mtest (maybe stuck) - ;; 2. if recovery is needed watch pid - ;; - when it exits take the exit code and do the needful - ;; - (let* ((pid (rmt:test-get-top-process-pid run-id test-id)) - (psres (with-input-from-pipe - (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") - (lambda () - (read-line)))) - (rundir (if (string? psres) ;; real process owned by user - (read-symbolic-link (conc "/proc/" pid "/cwd")) - #f))) - ;; now wait on that process if all is correct - ;; periodically update the db with runtime - ;; when the process exits look at the db, if still RUNNING after 10 seconds set - ;; state/status appropriately - (process-wait pid))) Index: launchmod.scm ================================================================== --- launchmod.scm +++ launchmod.scm @@ -18,18 +18,1829 @@ ;;====================================================================== (declare (unit launchmod)) (declare (uses commonmod)) +(declare (uses rmtmod)) +(declare (uses mtconfigf)) +(declare (uses testsmod)) +(declare (uses mtargs)) (module launchmod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(import scheme chicken data-structures extras files) +(import (prefix sqlite3 sqlite3:) posix typed-records + srfi-18 srfi-69 format ports srfi-1 matchable + z3 (prefix base64 base64:) regex + call-with-environment-variables csv) (import commonmod) +(import rmtmod) +(import (prefix mtconfigf configf:)) +(import testsmod) +(import (prefix mtargs args:)) + ;; (use (prefix ulex ulex:)) (include "common_records.scm") +(include "db_records.scm") +(include "run_records.scm") + +;;====================================================================== +;; launch a task - this runs on the originating host, tests themselves +;; +;;====================================================================== + + +;;====================================================================== +;; ezsteps +;;====================================================================== + +;; ezsteps were going to be coded as +;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute +;; BUT +;; now are +;; stepname {VAR=first,second,third ...} command ... +;; where the {VAR=first,second,third ...} is optional. + +;; given an exit code and whether or not logpro was used calculate OK/BAD +;; return #t if we are ok, #f otherwise +(define (steprun-good? logpro exitcode) + (or (eq? exitcode 0) + (and logpro (eq? exitcode 2)))) + +;; if handed a string, process it, else look for MT_CMDINFO +(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) + (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) + (if enccmd + (common:read-encoded-string enccmd) + '()))) + +;; 0 1 2 3 +(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) + +;; return (conc status ": " comment) from the final section so that +;; the comment can be set in the step record in launch.scm +;; +(define (launch:load-logpro-dat run-id test-id stepname) + (let ((cname (conc stepname ".dat"))) + (if (common:file-exists? cname) + (let* ((dat (configf:read-config cname #f #f)) + (csvr (db:logpro-dat->csv dat stepname)) + (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) + (fmt-csv (map list->csv-record csvr)))) + (status (configf:lookup dat "final" "exit-status")) + (msg (configf:lookup dat "final" "message"))) + (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (rmt:csv->test-data run-id test-id csvt) + (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) + ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) + ;; ) + (cond + ((equal? status "PASS") "PASS") ;; skip the message part if status is pass + (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) + (else #f))) + #f))) + +(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepinfo (cadr ezstep)) + ;; (let ((info (cadr ezstep))) + ;; (if (proc? info) "" info))) + ;; (stepproc (let ((info (cadr ezstep))) + ;; (if (proc? info) info #f))) + (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparams (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (paramparts (if (string? stepparams) + (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) + '())) + (subrun (alist-ref "subrun" paramparts equal?)) + (stepcmd (list-ref stepparts 3)) + (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ + (logpro-file (conc stepname ".logpro")) + (html-file (conc stepname ".html")) + (dat-file (conc stepname ".dat")) + (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) + (logpro-used (common:file-exists? logpro-file))) + + (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams + ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) + + (if (and tconfig-logpro + (not logpro-used)) ;; no logpro file found but have a defn in the testconfig + (begin + (with-output-to-file logpro-file + (lambda () + (print ";; logpro file extracted from testconfig\n" + ";;") + (print tconfig-logpro))) + (set! logpro-used #t))) + + ;; NB// can safely assume we are in test-area directory + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparams: " stepparams " stepcmd: " stepcmd) + + ;; ;; first source the previous environment + ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") + ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) + ;; (if (and prevstep (common:file-exists? prev-env)) + ;; (set! script (conc script "source " prev-env)))) + + ;; call the command using mt_ezstep + ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) + + (debug:print 4 *default-log-port* "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) + ;; now launch the actual process + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") + (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (pid #f)) + (let ((proc (lambda () + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (if subrun + (begin + (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") + (common:without-vars proc "^MT_.*")) + (proc))) + + (with-output-to-file "Makefile.ezsteps" + (lambda () + (print stepname ".log :") + (print "\t" cmd) + (if (common:file-exists? (conc stepname ".logpro")) + (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) + (print) + (print stepname " : " stepname ".log") + (print)) + #:append) + + (rmt:test-set-top-process-pid run-id test-id pid) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1)))) + ))))) + (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + ;; now run logpro if needed + (if logpro-used + (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) + (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))))) + (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) + + (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") "")) + (comment #f)) + (if logpro-used + (let ((datfile (conc stepname ".dat"))) + ;; load the .dat file into the test_data table if it exists + (if (common:file-exists? datfile) + (set! comment (launch:load-logpro-dat run-id test-id stepname))) + (rmt:test-set-log! run-id test-id (conc stepname ".html")))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) + ;; set the test final status + (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (this-step-status (cond + ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings + ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check + ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived + ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort + ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip + ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass + (else 'fail))) + (overall-status (cond + ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3) + (else 'fail))) + (next-status (cond + ((eq? overall-status 'pass) this-step-status) + ((eq? overall-status 'warn) + (if (eq? this-step-status 'fail) 'fail 'warn)) + ((eq? overall-status 'abort) 'abort) + (else 'fail))) + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? + (cond + ((null? tal) ;; more to run? + "COMPLETED") + (else "RUNNING")))) + (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status + " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) + (case next-status + ((warn) + (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WARN" + (if (eq? this-step-status 'warn) "Logpro warning found" #f) + #f)) + ((check) + (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "CHECK" + (if (eq? this-step-status 'check) "Logpro check found" #f) + #f)) + ((waived) + (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WAIVED" + (if (eq? this-step-status 'check) "Logpro waived found" #f) + #f)) + ((abort) + (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "ABORT" + (if (eq? this-step-status 'abort) "Logpro abort found" #f) + #f)) + ((skip) + (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "SKIP" + (if (eq? this-step-status 'skip) "Logpro skip found" #f) + #f)) + ((pass) + (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) + (else ;; 'fail + (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" + (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) + ))) + logpro-used)) + +(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) + ;; Since we should have a clean slate at this time there is no need to do + ;; any of the other stuff that tests:test-set-status! does. Let's just + ;; force RUNNING/n/a + + ;; (thread-sleep! 0.3) + ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + ;; if there is a runscript do it first + (if fullrunscript + (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + ))))) + ;; then, if runscript ran ok (or did not get called) + ;; do all the ezsteps (if any) + (if (or ezsteps subrun) + (let* ((test-run-dir (tests:get-test-path-from-environment)) + (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? + ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic + ;; ezstep names need a full re-eval here. + (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) + (ezstepslst (if (hash-table? testconfig) + (hash-table-ref/default testconfig "ezsteps" '()) + #f))) + (if testconfig + (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... + (begin + (launch:setup) + (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " + (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) + ;; after all that, still no testconfig? Time to abort + (if (not testconfig) + (begin + (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") + (exit 1))) + + ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry + ;; 1. get section [runarun] + ;; 2. unset MT_* vars + ;; 3. fix target + ;; 4. fix runname + ;; 5. fix testpatt or calculate it from contour + ;; 6. launch the run + ;; 7. roll up the run result and or roll up the logpro processed result + (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested + (subrun:initialize-toprun-test testconfig test-run-dir) + (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) + (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") + (set! ezsteps #t) ;; set the needed flag + (set! ezstepslst + (append (or ezstepslst '()) + (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) + + ;; process the ezsteps + (if ezsteps + (begin + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + (if (not (> (length ezstepslst) 0)) + (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (prevstep #f)) + (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") + ;; check exit-info (vector-ref exit-info 1) + (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) + (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)) + (stepname (car ezstep))) + ;; if logpro-used read in the stepname.dat file + (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) + (launch:load-logpro-dat run-id test-id stepname)) + (if (steprun-good? logpro-used (launch:einf-exit-code exit-info)) + (if (not (null? tal)) + (loop (car tal) (cdr tal) stepname)) + (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) + (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) + +(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) + (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) + (start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + + (let loop ((minutes (calc-minutes)) + (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (disk-free (get-df (current-directory))) + (last-sync (current-seconds))) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) + (let* ((over-time (> (current-seconds) (+ last-sync update-period))) + (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (delta (abs (- load cpu-load)))) + (if (> delta 0.1) ;; don't bother updating with small changes + load + #f))) + (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds + (get-df (current-directory)) + disk-free)) + (delta (abs (- df disk-free)))) + (if (and (> df 0) + (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg + df + #f))) + (do-sync (or new-cpu-load new-disk-free over-time)) + + (test-info (rmt:get-test-info-by-id run-id test-id)) + (state (db:test-get-state test-info)) + (status (db:test-get-status test-info)) + (kill-reason "no kill reason specified") + (kill-job? #f)) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) + (cond + ((test-get-kill-request run-id test-id) + (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") + (set! kill-job? #t)) + ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) + (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) + (set! kill-job? #t)) + ((equal? status "DEAD") + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") + ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING + (set! kill-job? #f))) + + (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) + (launch:handle-zombie-tests run-id) + (when do-sync + ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) + ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) + + (if kill-job? + (begin + (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) + (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? + (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) + ;; (if (process:alive? pid) + ;; (begin + (map (lambda (pid-num) + (process-signal pid-num signal/term)) + (process:get-sub-pids pid)) + (thread-sleep! 5) + ;; (if (process:process-alive? pid) + (map (lambda (pid-num) + (handle-exceptions + exn + #f + (process-signal pid-num signal/kill))) + (process:get-sub-pids pid)))) + ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) + pids) + ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt + (begin + (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt + ))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. But run end of run before exiting? + (launch:end-of-run-check run-id) + (exit))) + (if (hash-table-ref/default misc-flags 'keep-going #f) + (begin + (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses + (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta + (loop (calc-minutes) + (or new-cpu-load cpu-load) + (or new-disk-free disk-free) + (if do-sync (current-seconds) last-sync))))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional + + +(define (launch:execute encoded-cmd) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) + (tconfigreg #f)) + (setenv "MT_CMDINFO" encoded-cmd) + ;;(bb-check-path msg: "launch:execute incoming") + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) + ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area + (top-path (assoc/default 'toppath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (ezsteps (assoc/default 'ezsteps cmdinfo)) + (subrun (assoc/default 'subrun cmdinfo)) + ;; (runremote (assoc/default 'runremote cmdinfo)) + ;; (transport (assoc/default 'transport cmdinfo)) ;; not used + ;; (serverinf (assoc/default 'serverinf cmdinfo)) + ;; (port (assoc/default 'port cmdinfo)) + (serverurl (assoc/default 'serverurl cmdinfo)) + (homehost (assoc/default 'homehost cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (test-id (assoc/default 'test-id cmdinfo)) + (target (assoc/default 'target cmdinfo)) + (areaname (assoc/default 'areaname cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (runtlim (assoc/default 'runtlim cmdinfo)) + (contour (assoc/default 'contour cmdinfo)) + (item-path (item-list->path itemdat)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (keys #f) + (keyvals #f) + (fullrunscript (if (not runscript) + #f + (if (substring-index "/" runscript) + runscript ;; use unadultered if contains slashes + (let ((fulln (conc work-area "/" runscript))) + (if (and (common:file-exists? fulln) + (file-execute-access? fulln)) + fulln + runscript))))) ;; assume it is on the path + (check-work-area (lambda () + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:directory-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) + (begin + (debug:print 0 *default-log-port* + "INFO: we are expecting to be in directory " work-area "\n" + " but we are actually in the directory " (current-directory) "\n" + " doing another change dir.") + (change-directory work-area))) + + ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. + (let ((files (glob (conc testpath "/*"))) + (bad-files '())) + (for-each + (lambda (fullname) + (let* ((fname (pathname-strip-directory fullname)) + (targn (conc work-area "/" fname))) + (if (not (file-exists? targn)) + (set! bad-files (cons fname bad-files))))) + files) + (if (not (null? bad-files)) + (begin + (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") + (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) + (launch:test-copy testpath work-area)))) + ;; one more time, change to the work-area directory + (change-directory work-area))) + ) ;; let* + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (if (eq? signum signal/stop) + (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) + (set! *time-to-exit* #t) + (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") + (let ((th1 (make-thread (lambda () + (print "set test to COMPLETED/ABORT begin.") + (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") + (print "set test to COMPLETED/ABORT complete.") + (print "Killed by signal " signum ". Exiting") + (exit 1)))) + (th2 (make-thread (lambda () + (thread-sleep! 20) + (debug:print 0 *default-log-port* "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand) + ) ;; (set-signal-handler! signal/stop sighand) + + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) + (test-host (if test-info + (db:test-get-host test-info) + (begin + (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") + (exit)))) + (test-pid (db:test-get-process_id test-info))) + (cond + ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. + ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun + (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + + (rmt:general-call 'set-test-start-time #f test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) ;; prime it for running + ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) + (if (process:alive-on-host? test-host test-pid) + (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") + (exit))) + ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:general-call 'set-test-start-time #f test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) + (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) + (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") + (exit)))) + + ;; cleanup prior execution's steps + (rmt:delete-steps-for-test! run-id test-id) + + (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) + (set! keys (rmt:get-keys)) + ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup force-reread: #t)) + (begin + (debug:print 0 *default-log-port* "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + ;; validate that the test run area is available + (check-work-area) + + ;; still need to go back to run area home for next couple steps + (change-directory *toppath*) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) + (wconfig (configf:read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists + ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each + (lambda (section) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (safe-setenv var (configf:eval-string-in-environment val))) ;; val) + (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + ;;(bb-check-path msg: "launch:execute post block 1") + + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:file-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + ;; now we can switch to the work-area? + (change-directory work-area) + ;;(bb-check-path msg: "launch:execute post block 1.5") + ;; (change-directory work-area) + (set! keyvals (keys:target->keyval keys target)) + ;; apply pre-overrides before other variables. The pre-override vars must not + ;; clobbers things from the official sources such as megatest.config and runconfigs.config + (if (string? set-vars) + (let ((varpairs (string-split set-vars ","))) + (debug:print 4 *default-log-port* "varpairs: " varpairs) + (map (lambda (varpair) + (let ((varval (string-split varpair "="))) + (if (eq? (length varval) 2) + (let ((var (car varval)) + (val (cadr varval))) + (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") + (setenv var val))))) + varpairs))) + ;;(bb-check-path msg: "launch:execute post block 2") + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if val + (setenv var val) + (begin + (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") + (exit))))) + (list + (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) + (list "MT_RUNNAME" runname) + (list "MT_MEGATEST" megatest) + (list "MT_TARGET" target) + (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) + ;;(bb-check-path msg: "launch:execute post block 3") + + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) + ;;(bb-check-path msg: "launch:execute post block 4") + ;; (change-directory top-path) + ;; Can setup as client for server mode now + ;; (client:setup) + + + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + ;;(bb-check-path msg: "launch:execute post block 41") + (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) + ;;(bb-check-path msg: "launch:execute post block 42") + (set-item-env-vars itemdat) + ;;(bb-check-path msg: "launch:execute post block 43") + (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) + (if blacklist + (let ((vars (string-split blacklist))) + (save-environment-as-files "megatest" ignorevars: vars) + (for-each (lambda (var) + (unsetenv var)) + vars)) + (save-environment-as-files "megatest"))) + ;;(bb-check-path msg: "launch:execute post block 44") + ;; open-run-close not needed for test-set-meta-info + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) + + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (and fullrunscript + (common:file-exists? fullrunscript) + (not (file-execute-access? fullrunscript))) + (system (conc "chmod ug+x " fullrunscript)))) + + ;; We are about to actually kick off the test + ;; so this is a good place to remove the records for + ;; any previous runs + ;; (db:test-remove-steps db run-id testname itemdat) + ;; now is also a good time to write the .testconfig file + (let* ((tconfig-fname (conc work-area "/.testconfig")) + (tconfig-tmpfile (conc tconfig-fname ".tmp")) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) + (configf:write-alist tconfig tconfig-tmpfile) + (file-move tconfig-tmpfile tconfig-fname #t)) + ;; + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status + (job-thread #f) + ;; (keep-going #t) + (misc-flags (let ((ht (make-hash-table))) + (hash-table-set! ht 'keep-going #t) + ht)) + (runit (lambda () + (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) + (monitorjob (lambda () + (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job"))) + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (hash-table-set! misc-flags 'keep-going #f) + (thread-join! th1) + (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. + (mutex-lock! m) + (let* ((item-path (item-list->path itemdat)) + ;; only state and status needed - use lazy routine + (testinfo (rmt:get-testinfo-state-status run-id test-id))) + ;; Am I completed? + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status (cond + ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) + ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) + ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) + ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") + ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") + ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") + ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") + (else "FAIL")))) ;; (db:test-get-status testinfo))) + (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) + (tests:test-set-status! run-id + test-id + new-state + new-status + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! + )) + ;; for automated creation of the rollup html file this is a good place... + (if (not (equal? item-path "")) + (tests:summarize-items run-id test-id test-name #f)) + (tests:summarize-test run-id test-id) ;; don't force - just update if no + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + (mutex-unlock! m) + (launch:end-of-run-check run-id ) + (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " + work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") + (if (not (launch:einf-exit-status exit-info)) + (exit 4)))) + ))) + +;; Spec for End of test +;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup +;; At transition to run COMPLETED/X do hooks +;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND +;; we can prove the process is not alive (ssh host pstree -A pid) +;; if dead safe to mark the test as killed in the db +;; State/status table +;; new +;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup +;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na +;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED +;; 0 RUNNING ==> this is actually the first condition, should not get here + +(define (launch:end-of-run-check run-id ) + (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) + (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) + (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) + (current-state (rmt:get-run-state run-id)) + (current-status (rmt:get-run-status run-id))) + ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing + (debug:print 0 *default-log-port* "rollup run state/status") + (rmt:set-state-status-and-roll-up-run run-id current-state current-status) + + (cond + ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) + (debug:print 0 *default-log-port* "look for post hook.") + (runs:run-post-hook run-id)) + ((> running-cnt 3) + (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) + ((> running-cnt 0) + (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) + (let ((kill-cnt (launch:kill-tests-if-dead run-id))) + (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) + (launch:end-of-run-check run-id)))) ;;todo + (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) + (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (if (> (length not-completed-tests) 0) + (let loop ((running-test (car not-completed-tests)) + (tal (cdr not-completed-tests))) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11))) + (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") + (if (not (null? tal)) + (loop (car tal) (cdr tal))))))))))) + +(define (launch:is-test-alive host pid) +(if (and host pid (not (equal? host "n/a"))) +(let* ((cmd (conc "ssh " host " pstree -A " pid)) + (output (with-input-from-pipe cmd read-lines))) + (print "cmd: " cmd "\n op: " output ) + (if(eq? (length output) 0) + #f + #t)) +#t)) + +(define (launch:kill-tests-if-dead run-id) + (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (let loop ((running-test (car running-tests)) + (tal (cdr running-tests)) + (kill-cnt 0)) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11)) + (test-id (vector-ref running-test 0)) + (host (vector-ref running-test 6)) + (pid (rmt:test-get-top-process-pid run-id test-id)) + (event-time (vector-ref running-test 5)) + (duration (vector-ref running-test 12)) + (flag 0) + (curr-time (current-seconds))) + (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed + (begin + (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") + (set! flag 1) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) + (if (not (null? tal)) + (loop (car tal) (cdr tal) (+ kill-cnt flag)) + (+ kill-cnt flag)))))) + +;; DO NOT USE - caching of configs is handled in launch:setup now. +;; +(define (launch:cache-config) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (if (and *configdat* + (or (args:get-arg "-run") + (args:get-arg "-runtests") + (args:get-arg "-execute"))) + (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target exit-if-bad: #t)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname") + (getenv "MT_RUNNAME"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree + (begin + (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) + (if (not (common:file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (common:file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) + (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) + (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached + (begin + (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) + (if (not (common:in-running-test?)) + (configf:write-alist *configdat* tmpfile)) + (system (conc "ln -sf " tmpfile " " targfile)))) + ))) + (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) + + +;; gather available information, if legit read configs in this order: +;; +;; if have cache; +;; read it a return it +;; else +;; megatest.config (do not cache) +;; runconfigs.config (cache if all vars avail) +;; megatest.config (cache if all vars avail) +;; returns: +;; *toppath* +;; side effects: +;; sets; *configdat* (megatest.config info) +;; *runconfigdat* (runconfigs.config info) +;; *configstatus* (status of the read data) +;; +(define (launch:setup #!key (force-reread #f) (areapath #f)) + (mutex-lock! *launch-setup-mutex*) + (if (and *toppath* + (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all + (begin + (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") + (mutex-unlock! *launch-setup-mutex*) + *toppath*) + (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) + (mutex-unlock! *launch-setup-mutex*) + res))) + +;; return paths depending on what info is available. +;; +(define (launch:get-cache-file-paths areapath toppath target mtconfig) + (let* ((use-cache (common:use-cache?)) + (runname (common:args-get-runname)) + (linktree (common:get-linktree)) + (testname (common:get-full-test-name)) + (rundir (if (and runname target linktree) + (common:directory-writable? (conc linktree "/" target "/" runname)) + #f)) + (testdir (if (and rundir testname) + (common:directory-writable? (conc rundir "/" testname)) + #f)) + (cachedir (or testdir rundir)) + (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) + (debug:print-info 6 *default-log-port* + "runname=" runname + "\n linktree=" linktree + "\n testname=" testname + "\n rundir=" rundir + "\n testdir=" testdir + "\n cachedir=" cachedir + "\n mtcachef=" mtcachef + "\n rccachef=" rccachef) + (cons mtcachef rccachef))) + +(define (launch:setup-body #!key (force-reread #f) (areapath #f)) + (if (and (eq? *configstatus* 'fulldata) + *toppath* + (not force-reread)) ;; no need to reprocess + *toppath* ;; return toppath + (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. + (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath + (target (common:args-get-target)) + (sections (if target (list "default" target) #f)) ;; for runconfigs + (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... + (mtcachef (if (null? cachefiles) + #f + (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (if (null? cachefiles) + #f + (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) + ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) + (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource + ;;(BB> "launch:setup-body -- cachefiles="cachefiles) + (cond + ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME + ((and (not force-reread) + mtcachef rccachef + use-cache + (get-environment-variable "MT_RUN_AREA_HOME") + (common:file-exists? mtcachef) + (common:file-exists? rccachef)) + ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") + (set! *configdat* (configf:read-alist mtcachef)) + ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) + (set! *runconfigdat* (configf:read-alist rccachef)) + (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) + (set! *configstatus* 'fulldata) + (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) + *toppath*) + ;; there are no existing cached configs, do full reads of the configs and cache them + ;; we have all the info needed to fully process runconfigs and megatest.config + ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? + mtcachef + rccachef) ;; BB- why are we doing this without asking if caching is desired? + ;;(BB> "launch:setup-body -- cond branch 2") + (let* ((first-pass (configf:find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (first-rundat (let ((toppath (if toppath + toppath + (car first-pass)))) + (configf:read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. + (conc (if (string? toppath) + toppath + (get-environment-variable "MT_RUN_AREA_HOME")) + "/runconfigs.config") + *runconfigdat* #t + sections: sections)))) + (set! *runconfigdat* first-rundat) + (if first-pass ;; + (begin + ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") + (set! *configdat* (car first-pass)) + ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) + (set! *configinfo* first-pass) + (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it + (set! toppath *toppath*) + (if (not *toppath*) + (begin + (debug:print-error 0 *default-log-port* "you are not in a megatest area!") + (exit 1))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it + (let* ((keys (rmt:get-keys)) + (key-vals (keys:target->keyval keys target)) + (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) + ; (if *configdat* + ; (configf:lookup *configdat* "setup" "linktree") + ; (conc *toppath* "/lt")))) + (second-pass (configf:find-and-read-config + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config + (for-each (lambda (kt) + (setenv (car kt) (cadr kt))) + key-vals) + (configf:read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... + sections: sections))) + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + + (if rccachef + (common:fail-safe + (lambda () + (configf:write-alist runconfigdat rccachef)) + (conc "Could not write cache file - "rccachef))) + (if mtcachef + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef))) + (set! *runconfigdat* runconfigdat) + (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) + ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table + (set! *configdat* (make-hash-table)) + ))) + + ;; else read what you can and set the flag accordingly + ;; here we don't have either mtconfig or rccachef + (else + ;;(BB> "launch:setup-body -- cond branch 3 - else") + (let* ((cfgdat (configf:find-and-read-config + (or (args:get-arg "-config") "megatest.config") + environ-patt: "env-override" + given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") + pathenvvar: "MT_RUN_AREA_HOME"))) + + (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) + (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) + (rdat (configf:read-config (conc toppath ;; convert this to use runconfig:read! + "/runconfigs.config") *runconfigdat* #t sections: sections))) + (set! *configinfo* cfgdat) + (set! *configdat* (car cfgdat)) + (set! *runconfigdat* rdat) + (set! *toppath* toppath) + (set! *configstatus* 'partial)) + (begin + (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") + (exit 2)))))) + ;; COND ends here. + + ;; additional house keeping + (let* ((linktree (or (common:get-linktree) + (conc *toppath* "/lt")))) + (if linktree + (begin + (if (not (common:file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1)) + (create-directory linktree #t)))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (let ((tlink (conc *toppath* "/lt"))) + (if (not (common:file-exists? tlink)) + (create-symbolic-link linktree tlink))))) + (begin + (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") + ))) + (if (and *toppath* + (directory-exists? *toppath*)) + (begin + (setenv "MT_RUN_AREA_HOME" *toppath*) + (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) + (begin + (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") + (set! *toppath* #f) ;; force it to be false so we return #f + #f)) + + ;; one more attempt to cache the configs for future reading + (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) + (common:fail-safe + (lambda () + (configf:write-alist *runconfigdat* rccachef)) + (conc "Could not write cache file - "rccachef)) + ) + (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef)) + ) + (if (and rccachef mtcachef *runconfigdat* *configdat*) + (set! *configstatus* 'fulldata))) + + ;; if have -append-config then read and append here + (let ((cfname (args:get-arg "-append-config"))) + (if (and cfname + (file-read-access? cfname)) + (configf:read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. + *toppath*))) + +(define (get-best-disk confdat testconfig) + (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) + (hash-table-ref/default confdat "disks" #f))) + (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) + (string->number (or m "10000"))))) + (if disks + (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb + (if res + (cdr res) + (begin +;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") +;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) + ;;(exit 1) + (if (null? disks) + (cons 1 (conc *toppath* "/runs")) + (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) + (let loop ((head (car paths)) (tail (cdr paths))) + (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) + (if result + result + (if (null? tail) + (cons 1 (conc *toppath* "/runs")) + (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path. + + +(define (launch:test-copy test-src-path test-path) + (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) + (if cmd + ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH + (string-substitute "TEST_TARG_PATH" test-path + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) + #f))) + (cmd (if ovrcmd + ovrcmd + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) + (status (system cmd))) + (if (not (eq? status 0)) + (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) + + +;; Desired directory structure: +;; +;; - - -. +;; | +;; v +;; - - -|- +;; +;; dir stored in test is: +;; +;; - - [ - ] +;; +;; All log file links should be stored relative to the top of link path +;; +;; - [ - ] +;; +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) + (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it + (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. + run-info + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) + ;; convert back to db: from rdb: - this is always run at server end + (target (string-intersperse (map cadr keyvals) "/")) + + (not-iterated (equal? "" item-path)) + + ;; all tests are found at /test-base or /test-base + (testtop-base (conc target "/" runname "/" testname)) + (test-base (conc testtop-base (if not-iterated "" "/") item-path)) + + ;; nb// if itempath is not "" then it is prefixed with "/" + (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) + (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) + + ;; ensure this exists first as links to subtests must be created there + (linktree (common:get-linktree)) + ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree"))) + ;; (if rd rd (conc *toppath* "/runs")))) + ;; which seems wrong ... + + (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) + + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) + + (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) + (if (not (common:file-exists? linktree)) + (begin + (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) + (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) + ;; create the directory for the tests dir links, this is needed no matter what... try up to three times + (let loop ((done 3)) + (let ((success (if (and (not (common:directory-exists? lnkbase)) + (not (common:file-exists? lnkbase))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase) + (print-error-message exn (current-error-port)) + #t) + (create-directory lnkbase #t) + #f)))) + (if (and (not success)(> done 0)) + (loop (- done 1))))) + + ;; update the toptest record with its location rundir, cache the path + ;; This wass highly inefficient, one db write for every subtest, potentially + ;; thousands of unnecessary updates, cache the fact it was set and don't set it + ;; again. + + ;; Now create the link from the test path to the link tree, however + ;; if the test is iterated it is necessary to create the parent path + ;; to the iteration. use pathname-directory to trim the path by one + ;; level + (if (not not-iterated) ;; i.e. iterated + (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) + (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory iterated-parent #t)))) + + (if (symbolic-link? lnkpath) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (delete-file lnkpath))) + + (if (not (or (common:file-exists? lnkpath) + (symbolic-link? lnkpath))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-symbolic-link toptest-path lnkpath))) + + ;; NB - This was not working right - some top tests are not getting the path set!!! + ;; + ;; Do the setting of this record after the paths are created so that the shortdir can + ;; be set to the real directory location. This is safer for future clean up if the link + ;; tree is damaged or lost. + ;; + (if (not (hash-table-ref/default *toptest-paths* testname #f)) + (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) + (curr-test-path (if testinfo ;; (filedb:get-path *fdb* + ;; (db:get-path dbstruct + ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir testinfo) ;; ) ;; ) + #f))) + (hash-table-set! *toptest-paths* testname curr-test-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath + (if (common:file-exists? lnkpath) + ;; (resolve-pathname lnkpath) + (common:nice-path lnkpath) + lnkpath) + testname "" run-id) + ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) + (if (or (not curr-test-path) + (not (directory-exists? toptest-path))) + (begin + (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) + (handle-exceptions + exn + #f ;; don't care to catch and deal with errors here for now. + (create-directory toptest-path #t)) + (hash-table-set! *toptest-paths* testname toptest-path))))) + + ;; The toptest path has been created, the link to the test in the linktree has + ;; been created. Now, if this is an iterated test the real test dir must be created + (if (not not-iterated) ;; this is an iterated test + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) + (debug:print 2 *default-log-port* "Setting up sub test run area") + (debug:print 2 *default-log-port* " - creating run area in " test-path) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory test-path #t)) + (debug:print 2 *default-log-port* + " - creating link from: " test-path "\n" + " to: " lnktarget) + + ;; If there is already a symlink delete it and recreate it. + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit)) + (if (symbolic-link? lnktarget) (delete-file lnktarget)) + (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + + (if (and test-src-path (directory? test-path)) + (begin + (launch:test-copy test-src-path test-path) + (list lnkpathf lnkpath )) + (if (and test-src-path (> remtries 0)) + (begin + (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) + + +(define (launch:handle-zombie-tests run-id) + (let* ((key (conc "zombiescan-runid-"run-id)) + (now (current-seconds)) + (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) + (val (rmt:get-var key)) + (do-scan? + (cond + ((not val) + #t) + ((< val threshold) + #t) + (else #f)))) + (when do-scan? + (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") + (rmt:set-var key (current-seconds)) + (rmt:find-and-mark-incomplete run-id #f)))) + + + + + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) + (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex + (let* ( ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (item-path (item-list->path itemdat)) + (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) + (let loop ((delta (- (current-seconds) *last-launch*)) + (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) + (if (> launch-delay delta) + (begin + (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. + (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) + (thread-sleep! (- launch-delay delta)) + (loop (- (current-seconds) *last-launch*) launch-delay)))) + (change-directory *toppath*) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) + (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + (list "MT_CONTOUR" contour) + ) + itemdat)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed + ;; for tconfig, why do we allow fallback to test-conf? + (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) + (begin + (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") + test-conf))) ;; force re-read now that all vars are set + (useshell (let ((ush (configf:lookup *configdat* "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (runscript (configf:lookup tconfig "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag + (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun + ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) + ;; (memory (configf:lookup tconfig "requirements" "memory")) + ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed + (remote-megatest (configf:lookup *configdat* "setup" "executable")) + (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") + (configf:lookup *configdat* "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher")) + (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + ;; (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) + ;; (pp (hash-table->alist tconfig)) + (set! diskpath (get-best-disk *configdat* tconfig)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 *default-log-port* "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + ;; (list 'transport (conc *transport-type*)) + ;; (list 'serverinf *server-info*) + (list 'homehost (let* ((hhdat (common:get-homehost))) + (if hhdat + (car hhdat) + #f))) + (list 'serverurl (if *runremote* + (remote-server-url *runremote*) + #f)) ;; + (list 'areaname (common:get-testsuite-name)) + (list 'toppath *toppath*) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'subrun subrun) + (list 'target mt_target) + (list 'contour contour) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway + (if (common:file-exists? work-area) + (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir + (cond + ;; ((and launcher hosts) ;; must be using ssh hostname + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 *default-log-port* "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 *default-log-port* "fullcmd: " fullcmd) + (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default *configdat* "env-override" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + (testprevvals (alist->env-vars + (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) + (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd)))) + (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) + (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) + (if (not success) + (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) + (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file "mt_launch.log" + (lambda () + (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 *default-log-port* "Launching completed, updating db") + (debug:print 2 *default-log-port* "Launch results: " launch-results) + (if (not launch-results) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory *toppath*))) + +;; recover a test where the top controlling mtest may have died +;; +(define (launch:recover-test run-id test-id) + ;; this function is called on the test run host via ssh + ;; + ;; 1. look at the process from pid + ;; - is it owned by calling user + ;; - it it's run directory correct for the test + ;; - is there a controlling mtest (maybe stuck) + ;; 2. if recovery is needed watch pid + ;; - when it exits take the exit code and do the needful + ;; + (let* ((pid (rmt:test-get-top-process-pid run-id test-id)) + (psres (with-input-from-pipe + (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") + (lambda () + (read-line)))) + (rundir (if (string? psres) ;; real process owned by user + (read-symbolic-link (conc "/proc/" pid "/cwd")) + #f))) + ;; now wait on that process if all is correct + ;; periodically update the db with runtime + ;; when the process exits look at the db, if still RUNNING after 10 seconds set + ;; state/status appropriately + (process-wait pid))) + + +;; Do not rpc this one, do the underlying calls!!! +(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) + (let* ((real-status status) + (otherdat (if dat dat (make-hash-table))) + (testdat (rmt:get-test-info-by-id run-id test-id)) + (test-name (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + ;; before proceeding we must find out if the previous test (where all keys matched except runname) + ;; was WAIVED if this test is FAIL + + ;; NOTES: + ;; 1. Is the call to test:get-previous-run-record remotified? + ;; 2. Add test for testconfig waiver propagation control here + ;; + (prev-test (if (equal? status "FAIL") + (rmt:get-previous-test-run-record run-id test-name item-path) + #f)) + (waived (if prev-test + (if prev-test ;; true if we found a previous test in this run series + (let ((prev-status (db:test-get-status prev-test)) + (prev-state (db:test-get-state prev-test)) + (prev-comment (db:test-get-comment prev-test))) + (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment) + (if (and (equal? prev-state "COMPLETED") + (equal? prev-status "WAIVED")) + (if comment + comment + prev-comment) ;; waived is either the comment or #f + #f)) + #f) + #f))) + (if (and waived + (tests:check-waiver-eligibility testdat prev-test)) + (set! real-status "WAIVED")) + + (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) + + ;; update the primary record IF state AND status are defined + (if (and state status) + (begin + (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment)) + ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status + )) + + ;; if status is "AUTO" then call rollup (note, this one modifies data in test + ;; run area, it does remote calls under the hood. + ;; (if (and test-id state status (equal? status "AUTO")) + ;; (rmt:test-data-rollup run-id test-id status)) + + ;; add metadata (need to do this way to avoid SQL injection issues) + + ;; :first_err + ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) + ;; (if val + ;; (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) + ;; + ;; ;; :first_warn + ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f))) + ;; (if val + ;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) + + (let ((category (hash-table-ref/default otherdat ":category" "")) + (variable (hash-table-ref/default otherdat ":variable" "")) + (value (hash-table-ref/default otherdat ":value" #f)) + (expected (hash-table-ref/default otherdat ":expected" "n/a")) + (tol (hash-table-ref/default otherdat ":tol" "n/a")) + (units (hash-table-ref/default otherdat ":units" "")) + (type (hash-table-ref/default otherdat ":type" "")) + (dcomment (hash-table-ref/default otherdat ":comment" ""))) + (debug:print 4 *default-log-port* + "category: " category ", variable: " variable ", value: " value + ", expected: " expected ", tol: " tol ", units: " units) + (if (and value) ;; require only value; BB was- all three required + (let ((dat (conc category "," + variable "," + value "," + expected "," + tol "," + units "," + dcomment ",," ;; extra comma for status + type ))) + ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. + (rmt:csv->test-data run-id test-id + dat) + ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start" + ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue. + (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :) + ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server. + ))) + + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;;;;;; (if (not (equal? item-path "")) + ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) + + (if (or (and (string? comment) + (string-match (regexp "\\S+") comment)) + waived) + (let ((cmt (if waived waived comment))) + (rmt:general-call 'set-test-comment run-id cmt test-id))))) + ) Index: megamod.scm ================================================================== --- megamod.scm +++ megamod.scm @@ -147,11 +147,10 @@ ;;====================================================================== ;; L O C K I N G M E C H A N I S M S ;;====================================================================== ;; globals -(define *writes-total-delay* 0) (define *exit-started* #f) (define *last-monitor-update-time* 0) (define *default-log-port* (current-error-port)) ;; The watchdog is to keep an eye on things like db sync etc. @@ -173,11 +172,11 @@ (include "archive-inc.scm") (include "client-inc.scm") (include "common-inc.scm") ;; L5 (include "db-inc.scm") ;; L4 (include "env-inc.scm") -(include "http-transport-inc.scm") +;; (include "http-transport-inc.scm") ;; (include "items-inc.scm") ;; (include "keys-inc.scm") (include "launch-inc.scm") ;; L1 ;; (include "margs-inc.scm") (include "mt-inc.scm") Index: mtmod.scm ================================================================== --- mtmod.scm +++ mtmod.scm @@ -21,20 +21,36 @@ (declare (unit mtmod)) (declare (uses mtconfigf)) (declare (uses commonmod)) (declare (uses rmtmod)) (declare (uses testsmod)) +(declare (uses tasksmod)) +(declare (uses mtargs)) +(declare (uses dbmod)) +(declare (uses pgdbmod)) +(declare (uses servermod)) +(declare (uses launchmod)) (module mtmod * (import scheme chicken data-structures extras posix) (import (prefix mtconfigf configf:) + (prefix sqlite3 sqlite3:) + srfi-69 regex srfi-18 srfi-13 srfi-1 commonmod rmtmod - testsmod) + testsmod + tasksmod + dbmod + pgdbmod + servermod + (prefix mtargs args:)) + +(include "run_records.scm") +(include "db_records.scm") ;; This is the Megatest API. All generally "useful" routines will be wrapped or extended ;; here. @@ -976,14 +992,10 @@ ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== -(define (common:run-sync?) - (and (common:on-homehost?) - (args:get-arg "-server"))) - ;; (let ((ohh (common:on-homehost?)) ;; (srv (args:get-arg "-server"))) ;; (and ohh srv))) ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) @@ -1191,7 +1203,1246 @@ ;; '()) ) (if (common:api-changed?) (common:set-last-run-version))) +;; called in megatest.scm, host-port is string hostname:port +;; +;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; in the same process as the server. +;; +(define (server:ping host-port-in #!key (do-exit #f)) + (let ((host:port (if (not host-port-in) ;; use read-dotserver to find + #f ;; (server:check-if-running *toppath*) + ;; (if (number? host-port-in) ;; we were handed a server-id + ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) + ;; ;; (print "srec: " srec " host-port-in: " host-port-in) + ;; (if srec + ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) + ;; (conc "no such server-id " host-port-in))) + host-port-in))) ;; ) + (let* ((host-port (if host:port + (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f)) + #f))) +;; (toppath (launch:setup))) + ;; (print "host-port=" host-port) + (if (not host-port) + (begin + (if host-port-in + (debug:print 0 *default-log-port* "ERROR: bad host:port")) + (if do-exit (exit 1)) + #f) + (let* ((iface (car host-port)) + (port (cadr host-port)) + (server-dat (http-transport:client-connect iface port)) + (login-res (rmt:login-no-auto-client-setup server-dat))) + (if (and (list? login-res) + (car login-res)) + (begin + ;; (print "LOGIN_OK") + (if do-exit (exit 0)) + #t) + (begin + ;; (print "LOGIN_FAILED") + (if do-exit (exit 1)) + #f))))))) + +;; run ping in separate process, safest way in some cases +;; +(define (server:ping-server ifaceport) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -ping " ifaceport) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) + +;; ping the given server +;; +(define (server:check-server server-record) + (let* ((server-url (server:record->url server-record)) + (res (case *transport-type* + ((http)(server:ping server-url)) + ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + ))) + (if res + server-url + #f))) + +;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; +(define (server:check-if-running areapath) ;; #!key (numservers "2")) + (let* ((ns (server:get-num-servers)) + (servers (server:get-best (server:get-list areapath)))) + ;; (print "servers: " servers " ns: " ns) + (if (or (and servers + (null? servers)) + (not servers) + (and (list? servers) + (< (length servers) (random ns)))) ;; somewhere between 0 and numservers + #f + (let loop ((hed (car servers)) + (tal (cdr servers))) + (let ((res (server:check-server hed))) + (if res + res + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + + +;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; +(define (server:login toppath) + (lambda (toppath) + (set! *db-last-access* (current-seconds)) ;; might not be needed. + (if (equal? *toppath* toppath) + #t + #f))) +;; Given a run id start a server process ### NOTE ### > file 2>&1 +;; if the run-id is zero and the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area + (let* ((curr-host (get-host-name)) + ;; (attempt-in-progress (server:start-attempted? areapath)) + ;; (dot-server-url (server:check-if-running areapath)) + (curr-ip (server:get-best-guess-address curr-host)) + (curr-pid (current-process-id)) + (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + (target-host (car homehost)) + (testsuite (common:get-testsuite-name)) + (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (cmdln (conc (common:get-megatest-exe) + " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") + ;; " -log " logfile + " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) + (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) + ;; we want the remote server to start in *toppath* so push there + (push-directory areapath) + (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") + (thread-start! log-rotate) + + ;; host.domain.tld match host? + (if (and target-host + ;; look at target host, is it host.domain.tld or ip address and does it + ;; match current ip or hostname + (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + (not (equal? curr-ip target-host))) + (begin + (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) + (setenv "TARGETHOST" target-host))) + + (setenv "TARGETHOST_LOGF" logfile) + (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time + (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever + (system (conc "nbfake " cmdln)) + (unsetenv "TARGETHOST_LOGF") + (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + (thread-join! log-rotate) + (pop-directory))) + +;; kind start up of servers, wait 40 seconds before allowing another server for a given +;; run-id to be launched +(define (server:kind-run areapath) + (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? + (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun + (call-num (car last-run-dat)) + (when-run (cadr last-run-dat)) + (run-delay (+ (case call-num + ((0) 0) + ((1) 20) + ((2) 300) + (else 600)) + (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously + (lock-file (conc areapath "/logs/server-start.lock"))) + (if (> (- (current-seconds) when-run) run-delay) + (begin + (common:simple-file-lock-and-wait lock-file expire-time: 15) + (server:run areapath) + (thread-sleep! 2) ;; don't release the lock for at least a few seconds + (common:simple-file-release-lock lock-file))) + (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) + +(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. + +(define (server:start-and-wait areapath #!key (timeout 60)) + (let ((give-up-time (+ (current-seconds) timeout))) + (let loop ((server-url (server:check-if-running areapath)) + (try-num 0)) + (if (or server-url + (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. + server-url + (let ((num-ok (length (server:get-best (server:get-list areapath))))) + (if (and (> try-num 0) ;; first time through simply wait a little while then try again + (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one + (server:kind-run areapath)) + (thread-sleep! 5) + (loop (server:check-if-running areapath) + (+ try-num 1))))))) + +;;====================================================================== +;; make html output +;;====================================================================== + +(define (tests:test-set-toplog! run-id test-name logf) + (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name)) + +(define (tests:summarize-items run-id test-id test-name force) + ;; if not force then only update the record if one of these is true: + ;; 1. logf is "log/final.log + ;; 2. logf is same as outputfilename + (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) + (orig-dir (current-directory)) + (logf-info (rmt:test-get-logfile-info run-id test-name)) + (logf (if logf-info (cadr logf-info) #f)) + (path (if logf-info (car logf-info) #f))) + ;; This query finds the path and changes the directory to it for the test + (if (and (string? path) + (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ... + (begin + (debug:print 4 *default-log-port* "Found path: " path) + (change-directory path)) + ;; (set! outputfilename (conc path "/" outputfilename))) + (debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) + (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) + (if (or (equal? logf "logs/final.log") + (equal? logf outputfilename) + force) + (let ((my-start-time (current-seconds)) + (lockf (conc outputfilename ".lock"))) + (let loop ((have-lock (common:simple-file-lock lockf))) + (if have-lock + (let ((script (configf:lookup *configdat* "testrollup" test-name))) + (print "Obtained lock for " outputfilename) + (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) + (if script + (system (conc script " > " outputfilename " & ")) + (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) + (common:simple-file-release-lock lockf) + (change-directory orig-dir) + ;; NB// tests:test-set-toplog! is remote internal... + (tests:test-set-toplog! run-id test-name outputfilename)) + ;; didn't get the lock, check to see if current update started later than this + ;; update, if so we can exit without doing any work + (if (> my-start-time (handle-exceptions + exn + 0 + (file-modification-time lockf))) + ;; we started since current re-gen in flight, delay a little and try again + (begin + (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") + (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds + (loop (common:simple-file-lock lockf)))))))))) + +(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) + (let ((counts (make-hash-table)) + (statecounts (make-hash-table)) + (outtxt "") + (tot 0) + (testdat (rmt:test-get-records-for-index-file run-id test-name))) + (with-output-to-file outputfilename + (lambda () + (set! outtxt (conc outtxt "Summary: " test-name + "

Summary for " test-name "

")) + (for-each + (lambda (testrecord) + (let ((id (vector-ref testrecord 0)) + (itempath (vector-ref testrecord 1)) + (state (vector-ref testrecord 2)) + (status (vector-ref testrecord 3)) + (run_duration (vector-ref testrecord 4)) + (logf (vector-ref testrecord 5)) + (comment (vector-ref testrecord 6))) + (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) + (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) + (set! outtxt (conc outtxt "" + ;; " " itempath "" + " " itempath "" + "" state "" + "" status "" + "" (if (equal? comment "") + " " + comment) "" + "")))) + (if (list? testdat) + testdat + (begin + (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) + '()))) + + (print "
") + ;; Print out stats for status + (set! tot 0) + (print "") + (for-each (lambda (state) + (set! tot (+ tot (hash-table-ref statecounts state))) + (print "")) + (hash-table-keys statecounts)) + (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") + (print "
") + ;; Print out stats for state + (set! tot 0) + (print "") + (for-each (lambda (status) + (set! tot (+ tot (hash-table-ref counts status))) + (print "")) + (hash-table-keys counts)) + (print "

Status stats

" status + "" (hash-table-ref counts status) "
Total" tot "
") + (print "
") + + (print "" + "" + outtxt "
ItemStateStatusComment
") + ;; (release-dot-lock outputfilename) + ;;(rmt:update-run-stats + ;; run-id + ;; (hash-table-map + ;; state-status-counts + ;; (lambda (key val) + ;; (append key (list val))))) + )))) + +(define tests:css-jscript-block +#< +ul.LinkedList { display: block; } +/* ul.LinkedList ul { display: none; } */ +.HandCursorStyle { cursor: pointer; cursor: hand; } /* For IE */ +th {background-color: #8c8c8c;} +td.test {background-color: #d9dbdd;} +td.PASS {background-color: #347533;} +td.FAIL {background-color: #cc2812;} +td.SKIP{background-color: #FFD733;} +td.WARN {background-color: #EA8724;} +td.WAIVED {background-color: #838A12;} +td.ABORT{background-color: #EA24B7;} +.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;} + + + + + + +EOF +) + +(define tests:css-jscript-block-dynamic +#< +EOF +) + +(define (test:js-block javascript-lib) + (conc "" )) + + +(define tests:css-jscript-block-static (test:js-block *java-script-lib*)) + +(define (tests:css-jscript-block-cond dynamic) + (if (equal? dynamic #t) + tests:css-jscript-block-dynamic + tests:css-jscript-block-static)) + + +(define (tests:run-record->test-path run numkeys) + (append (take (vector->list run) numkeys) + (list (vector-ref run (+ 1 numkeys))))) + + +(define (tests:get-rest-data runs header numkeys) + (let ((resh (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-id (db:get-value-by-header run header "id")) + (run-dir (tests:run-record->test-path run numkeys)) + (test-data (rmt:get-tests-for-run + run-id + "%" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f))) + + (map (lambda (test) + (let* ((test-name (vector-ref test 2)) + (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13))) + (test-item (conc test-name ":" (vector-ref test 11))) + (test-status (vector-ref test 4))) + + (if (not (hash-table-ref/default resh test-name #f)) + (hash-table-set! resh test-name (make-hash-table))) + (if (not (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f)) + (hash-table-set! (hash-table-ref/default resh test-name #f) test-item (make-hash-table))) + (hash-table-set! (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f) run-id (list test-status test-html-path)))) + test-data))) + runs) + resh)) + + +;; hash-table tree to html list tree +;; +;; tipfunc takes two parameters: y the tip value and path the path to that point +;; +(define (common:htree->html ht path tipfunc) + (let ((datlist (sort (hash-table->alist ht) + (lambda (a b) + (string< (car a)(car b)))))) + (if (null? datlist) + (tipfunc #f path) ;; really shouldn't get here + (s:ul + (map (lambda (x) + (let* ((levelname (car x)) + (y (cdr x)) + (newpath (append path (list levelname))) + (leaf (or (not (hash-table? y)) + (null? (hash-table-keys y))))) + (if leaf + (s:li (tipfunc y newpath)) + (s:li + (list + levelname + (common:htree->html y newpath tipfunc)))))) + datlist))))) + + +;; tests:genrate dashboard body +;; + +(define (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt) + (let* ((start (* page pg-size)) + ;(runsdat (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys))) + (runsdat (rmt:get-runs-by-patt keys run-patt target-patt start pg-size #f 0 sort-order: "desc")) + ; db:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-update + (header (vector-ref runsdat 0)) + (runs (vector-ref runsdat 1)) + (ctr 0) + (test-runs-hash (tests:get-rest-data runs header numkeys)) + (test-list (hash-table-keys test-runs-hash))) + + (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag) + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (get-prev-links page linktree) + (get-next-links page linktree total-runs) + + (s:h1 "Summary for " area-name) + (s:h3 "Filter" ) + (s:input 'type "text" 'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()") + ;; top list + + (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 + (map (lambda (key) + (let* ((res (s:tr 'class "something" + (s:th key ) + (map (lambda (run) + (s:th (vector-ref run ctr))) + runs)))) + (set! ctr (+ ctr 1)) + res)) + keys) + (s:tr + (s:th "Run Name") + (map (lambda (run) + (s:th (db:get-value-by-header run header "runname"))) + runs)) + + (map (lambda (test-name) + (let* ((item-hash (hash-table-ref/default test-runs-hash test-name #f)) + (item-keys (sort (hash-table-keys item-hash) string<=?))) + (map (lambda (item-name) + (let* ((res (s:tr 'class item-name + (s:td item-name 'class "test" ) + (map (lambda (run) + (let* ((run-test (hash-table-ref/default item-hash item-name #f)) + (run-id (db:get-value-by-header run header "id")) + (result (hash-table-ref/default run-test run-id "n/a")) + ;(relative-path (get-relative-path)) + (status (if (string? result) + result + (car result))) + (link (if (string? result) + result + (if (equal? flag #t) + (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname=" item-name )) + (s:a (car result) 'href (string-substitute (conc linktree "/") "" (cadr result) "-")))))) + (s:td link 'class status))) + runs)))) + res)) + item-keys))) + test-list)))))) + +;; (tests:create-html-tree "test-index.html") +;; +(define (tests:create-html-tree outf) + (let* ((lockfile (conc outf ".lock")) + (runs-to-process '()) + (linktree (common:get-linktree)) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (run-patt (or (args:get-arg "-run-patt") + (args:get-arg "-runname") + "%")) + (target (or (args:get-arg "-target-patt") + (args:get-arg "-target") + "%")) + (targlist (string-split target "/")) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "%")) + targlist)) + (target-patt (string-join targtweaked "/")) + ;(total-runs (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target + (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) + (pg-size 10)) + (if (common:simple-file-lock lockfile) + (begin + ;(print total-runs) + (let loop ((page 0)) + (let* ((oup (open-output-file (or outf (conc linktree "/page" page ".html")))) + (get-prev-links (lambda (page linktree ) + (let* ((link (if (not (eq? page 0)) + (s:a "<<prev" 'href (conc "page" (- page 1) ".html")) + (s:a "" 'href (conc "page" page ".html"))))) + link))) + (get-next-links (lambda (page linktree total-runs) + (let* ((link (if (> total-runs (+ 10 (* page pg-size))) + (s:a "next>>" 'href (conc "page" (+ page 1) ".html")) + (s:a "" 'href (conc "page" page ".html"))))) + link))) ) + (print "total runs: " total-runs) + (s:output-new + oup + (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function + (close-output-port oup) + ; (set! page (+ 1 page)) + (if (> total-runs (* (+ 1 page) pg-size)) + (loop (+ 1 page))))) + (common:simple-file-release-lock lockfile)) + + #f))) + + +(define (tests:readlines filename) + (call-with-input-file filename + (lambda (p) + (let loop ((line (read-line p)) + (result '())) + (if (eof-object? line) + (reverse result) + (loop (read-line p) (cons line result))))))) + +(define (tests:get-test-log run-id test-name item-name) + (let* ((test-data (rmt:get-tests-for-run + (string->number run-id) + test-name ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (path "") + (found 0)) + (debug:print-info 0 *default-log-port* "found: " found ) + + (let loop ((hed (car test-data)) + (tal (cdr test-data))) + (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13)) + + (if (equal? (vector-ref hed 11) item-name) + (begin + (set! found 1) + (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13))))) + (if (and (not (null? tal)) (equal? found 0)) + (loop (car tal)(cdr tal)))) + (if (equal? path "") + "

Data not found

" + (string-join (tests:readlines path) "\n")))) + + +(define (tests:dynamic-dboard page) +;(define (tests:create-html-tree o) + (let* ( +;(page "1") + (linktree (common:get-linktree)) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (targtweaked (make-list numkeys "%")) + (target-patt (string-join targtweaked "/")) + (total-runs (rmt:get-num-runs "%")) + (pg-size 10) + (pg (if (equal? page #f) + 0 + (- (string->number page) 1))) + (get-prev-links (lambda (pg linktree) + (debug:print-info 0 *default-log-port* "val: " (- 1 pg)) + (let* ((link (if (not (eq? pg 0)) + (s:a "<<prev " 'href (conc "dashboard?page=" pg )) + (s:a "" 'href (conc "dashboard?page=" pg))))) + link))) + (get-next-links (lambda (pg linktree total-runs) + (debug:print-info 0 *default-log-port* "val: " pg) + (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size) + + (let* ((link (if (> total-runs (+ 10 (* pg pg-size))) + (s:a "next>> " 'href (conc "dashboard?page=" (+ pg 2) )) + (s:a "" 'href (conc "dashboard?page=" pg ))))) + link))) + (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function + html-body)) + +(define (tests:create-html-summary outf) + (let* ((lockfile (conc outf ".lock")) + (linktree (common:get-linktree)) + (keys (rmt:get-keys)) + (area-name (common:get-testsuite-name)) + (run-patt (or (args:get-arg "-run-patt") + (args:get-arg "-runname") + "%")) + (target (or (args:get-arg "-target-patt") + (args:get-arg "-target") + "%")) + (targlist (string-split target "/")) + (numkeys (length keys)) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "%")) + targlist)) + (target-patt (string-join targtweaked "/"))) + (if (common:simple-file-lock lockfile) + (begin + (let* (;(runsdat1 (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys))) + (runsdat (rmt:get-runs-by-patt keys run-patt target-patt #f #f #f 0)) + (runs (vector-ref runsdat 1)) + (header (vector-ref runsdat 0)) + (oup (open-output-file (or outf (conc linktree "/targets.html")))) + (target-hash (test:create-target-hash runs header (length keys)))) + (test:create-target-html target-hash oup area-name linktree) + (test:create-run-html runs area-name linktree (length keys) header)) + (common:simple-file-release-lock lockfile)) + #f))) + +(define (test:get-test-hash test-data) + (let ((resh (make-hash-table))) + (map (lambda (test) + (let* ((test-name (vector-ref test 2)) + (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html")) + (conc (vector-ref test 10) "/test-summary.html" ) + (conc (vector-ref test 10) "/" (vector-ref test 13)))) + (test-item (vector-ref test 11)) + (test-status (vector-ref test 4))) + (if (not (hash-table-ref/default resh test-item #f)) + (hash-table-set! resh test-item (make-hash-table))) + (hash-table-set! (hash-table-ref/default resh test-item #f) test-name (list test-status test-html-path)))) + test-data) +resh)) + +(define (test:get-data->b-keys ordered-data a-keys) + (delete-duplicates + (sort (apply + append + (map (lambda (sub-key) + (let ((subdat (hash-table-ref ordered-data sub-key))) + (hash-table-keys subdat))) + a-keys)) + string>=?))) + + +(define (test:create-run-html runs area-name linktree numkeys header) + (map (lambda (run) + (let* ((target (string-join (take (vector->list run) numkeys) "/")) + (run-name (db:get-value-by-header run header "runname")) + (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time"))) + (oup (if (file-exists? (conc linktree "/" target "/" run-name)) + (open-output-file (conc linktree "/" target "/" run-name "/run.html")) + #f)) + (run-id (db:get-value-by-header run header "id")) + (test-data (rmt:get-tests-for-run + run-id + "%" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (item-test-hash (test:get-test-hash test-data)) + (items (hash-table-keys item-test-hash)) + (test-names (test:get-data->b-keys item-test-hash items))) + (if oup + (begin + (s:output-new + oup + (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) + (s:title "Runs View " run-name) + (s:body + (s:h1 "Runs View " ) + (s:h3 "Target" target) + (s:p + (s:b "Run name" ) run-name) + (s:p + (s:b "Run Date" ) run-time) + (s:table 'border 1 'cellspacing 0 + (s:tr + (s:th "Items") + (map (lambda (test) + (s:th test)) + test-names)) + (map (lambda (item) + (let* ((test-hash (hash-table-ref/default item-test-hash item #f))) + (if test-hash + (begin + (s:tr + (s:td 'class "test" item) + (map (lambda (test) + (let* ((test-details (hash-table-ref/default test-hash test #f)) + (status (if test-details + (car test-details))) + (link (if test-details + (string-substitute (conc linktree "/" target "/" run-name "/") "" (cadr test-details) "-")))) + (if test-details + (s:td 'class status + (s:a 'class "link" 'href link status )) + (s:td "")))) + test-names)))))) + (sort items string<=?)))))) + (close-output-port oup)) + (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html")))) +runs)) + +(define (test:create-target-hash runs header numkeys) + (let ((resh (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-name (db:get-value-by-header run header "runname")) + (target (string-join (take (vector->list run) numkeys) "/")) + (run-list (hash-table-ref/default resh target #f))) + + (if (not run-list) + (hash-table-set! resh target (list run-name)) + (hash-table-set! resh target (cons run-name run-list))))) + runs) + resh)) + +(define (test:get-max-run-cnt target-hash targets) + (let* ((cnt 0 )) + (map (lambda (target) + (let* ((runs (hash-table-ref/default target-hash target #f)) + (run-length (if runs + (length runs) + 0))) + + (if (< cnt run-length) + (set! cnt run-length)))) + targets) +cnt)) + +(define (test:pad-runs target-hash targets max-row-length) + (map (lambda (target) + (let loop ((run-list (hash-table-ref/default target-hash target #f))) + (if (< (length run-list) max-row-length) + (begin + (hash-table-set! target-hash target (cons "" run-list)) + (loop (hash-table-ref/default target-hash target #f) ))))) + targets) + target-hash) + +(define (test:create-target-html target-hash oup area-name linktree) + (let* ((targets (hash-table-keys target-hash)) + (max-row-length (test:get-max-run-cnt target-hash targets)) + (pad-runs-hash (test:pad-runs target-hash targets max-row-length))) + (s:output-new + oup + (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) + + (s:title "Target View " area-name) + (s:body + (s:h1 "Target View " area-name) + (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 + (s:tr 'class "something" + (s:th "Target") + (s:th 'colspan max-row-length "Runs")) + (let* ((tbl (map (lambda (target) + (s:tr + (s:td 'class "test" target) + (let* ((runs (hash-table-ref/default target-hash target #f)) + (rest-row (map (lambda (run) + (if (equal? run "") + (s:td run) + (if (file-exists?(conc linktree "/" target "/" run )) + (begin + (s:td + (s:a 'href (conc target "/" run "/run.html") run)))))) + (reverse runs)))) + rest-row))) + targets))) + tbl))))) + (close-output-port oup))) + + +(define (tests:create-html-tree-old outf) + (let* ((lockfile (conc outf ".lock")) + (runs-to-process '())) + (if (common:simple-file-lock lockfile) + (let* ((linktree (common:get-linktree)) + (oup (open-output-file (or outf (conc linktree "/runs-index.html")))) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (runsdat (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys))) + (header (vector-ref runsdat 0)) + (runs (vector-ref runsdat 1)) + (runtreedat (map (lambda (x) + (tests:run-record->test-path x numkeys)) + runs)) + (runs-htree (common:list->htree runtreedat))) + (set! runs-to-process runs) + (s:output-new + oup + (s:html tests:css-jscript-block + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (s:h1 "Summary for " area-name) + ;; top list + (s:ul 'id "LinkedList1" 'class "LinkedList" + (s:li + "Runs" + (common:htree->html runs-htree + '() + (lambda (x p) + (let* ((targ-path (string-intersperse p "/")) + (full-path (conc linktree "/" targ-path)) + (run-name (car (reverse p)))) + (if (and (common:file-exists? full-path) + (directory? full-path) + (file-write-access? full-path)) + (s:a run-name 'href (conc targ-path "/run-summary.html")) + (begin + (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html") + (conc run-name " (Not able to create summary at " targ-path ")"))))))))))) + (close-output-port oup) + (common:simple-file-release-lock lockfile) + + (for-each + (lambda (run) + (let* ((test-subpath (tests:run-record->test-path run numkeys)) + (run-id (db:get-value-by-header run header "id")) + (run-dir (tests:run-record->test-path run numkeys)) + (test-dats (rmt:get-tests-for-run + run-id + "%/" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (tests-tree-dat (map (lambda (test-dat) + ;; (tests:run-record->test-path x numkeys)) + (let* ((test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (full-name (db:test-make-full-name test-name item-path)) + (path-parts (string-split full-name))) + path-parts)) + test-dats)) + (tests-htree (common:list->htree tests-tree-dat)) + (html-dir (conc linktree "/" (string-intersperse run-dir "/"))) + (html-path (conc html-dir "/run-summary.html")) + (oup (if (and (common:file-exists? html-dir) + (directory? html-dir) + (file-write-access? html-dir)) + (open-output-file html-path) + #f))) + ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat) + (if oup + (begin + (s:output-new + oup + (s:html tests:css-jscript-block + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (s:h1 "Summary for " (string-intersperse run-dir "/")) + ;; top list + (s:ul 'id "LinkedList1" 'class "LinkedList" + (s:li + "Tests" + (common:htree->html tests-htree + '() + (lambda (x p) + (let* ((targ-path (string-intersperse p "/")) + (test-name (car p)) + (item-path ;; (if (> (length p) 2) ;; test-name + run-name + (string-intersperse p "/")) + (full-targ (conc html-dir "/" targ-path)) + (std-file (conc full-targ "/test-summary.html")) + (alt-file (conc full-targ "/megatest-rollup-" test-name ".html")) + (html-file (if (common:file-exists? alt-file) + alt-file + std-file)) + (run-name (car (reverse p)))) + (if (and (not (common:file-exists? full-targ)) + (directory? full-targ) + (file-write-access? full-targ)) + (tests:summarize-test + run-id + (rmt:get-test-id run-id test-name item-path))) + (if (common:file-exists? full-targ) + (s:a run-name 'href html-file) + (begin + (debug:print 0 *default-log-port* "ERROR: can't access " full-targ) + (conc "No summary for " run-name))))) + )))))) + (close-output-port oup))))) + runs) + #t) + #f))) + + + + +(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) + (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) + (if (and cpuload diskfree) + (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) + (if minutes + (rmt:general-call 'update-run-duration run-id minutes test-id)) + (if (and uname hostname) + (rmt:general-call 'update-uname-host run-id uname hostname test-id))) + +;; This one is for running with no db access (i.e. via rmt: internally) +(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) +;; (define (tests:set-full-meta-info test-id run-id minutes work-area) +;; (let ((remtries 10)) + (let* ((cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (uname (get-uname "-srvpio")) + (hostname (get-host-name))) + (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) + +;; +;; +(define (tests:get-compressed-steps run-id test-id) + (let* ((steps-data (rmt:get-steps-for-test run-id test-id)) ;; 0 1 2 3 4 5 6 7 + (comprsteps (tests:process-steps-table steps-data))) ;; # + (map (lambda (x) + ;; take advantage of the \n on time->string + (vector ;; we are constructing basically the original vector but collapsing start end records + (vector-ref x 0) ;; id 0 + (let ((s (vector-ref x 1))) + (if (number? s)(seconds->time-string s) s)) ;; starttime 1 + (let ((s (vector-ref x 2))) + (if (number? s)(seconds->time-string s) s)) ;; endtime 2 + (vector-ref x 3) ;; status 3 + (vector-ref x 4) ;; duration 4 + (vector-ref x 5) ;; logfile 5 + (vector-ref x 6) ;; comment 6 + (vector-ref x 7))) ;; id 7 + (sort (hash-table-values comprsteps) + (lambda (a b) + (let ((time-a (vector-ref a 1)) + (time-b (vector-ref b 1)) + (id-a (vector-ref a 7)) + (id-b (vector-ref b 7))) + (if (and (number? time-a)(number? time-b)) + (if (< time-a time-b) + #t + (if (eq? time-a time-b) + (< id-a id-b) + ;; (stringwork-week/day-time + (db:test-get-event_time test-dat))) + (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) + (s:h3 "Log files") + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) + (map (lambda (step-dat) + (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) + (s:td (tdb:steps-table-get-start step-dat)) + (s:td (tdb:steps-table-get-end step-dat)) + (s:td (tdb:steps-table-get-status step-dat)) + (s:td (tdb:steps-table-get-runtime step-dat)) + (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) + (s:a 'href step-log step-log))))) + steps-dat)) + ))) + (close-output-port oup))))) + + +;; MUST BE CALLED local! +;; +(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '())) + ;; BUG: Move the values derived from args to parameters and push to megatest.scm + (let* ((testpatt (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) + (statepatt (or (args:get-arg "-state") (args:get-arg ":state") "%")) + (statuspatt (or (args:get-arg "-status") (args:get-arg ":status") "%")) + (runname (or (args:get-arg "-runname") (args:get-arg ":runname") "%")) + (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res + testpatt + statepatt + statuspatt + runname))) + (if fnamepatt + (apply append + (map (lambda (p) + (if (directory-exists? p) + (let ((glob-query (conc p "/" fnamepatt))) + (handle-exceptions + exn + (with-input-from-pipe + (conc "echo " glob-query) + read-lines) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar + (glob glob-query))) + '())) + paths-from-db)) + paths-from-db))) + + +;; for each test: +;; +(define (tests:filter-non-runnable run-id testkeynames testrecordshash) + (let ((runnables '())) + (for-each + (lambda (testkeyname) + (let* ((test-record (hash-table-ref testrecordshash testkeyname)) + (test-name (tests:testqueue-get-testname test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) + (item-path (tests:testqueue-get-item_path test-record)) + (waitons (tests:testqueue-get-waitons test-record)) + (keep-test #t) + (test-id (rmt:get-test-id run-id test-name item-path)) + (tdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) + (if tdat + (begin + ;; Look at the test state and status + (if (or (and (member (db:test-get-status tdat) + '("PASS" "WARN" "WAIVED" "CHECK" "SKIP")) + (equal? (db:test-get-state tdat) "COMPLETED")) + (member (db:test-get-state tdat) + '("INCOMPLETE" "KILLED"))) + (set! keep-test #f)) + + ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test + ;; from the runnable list + (if keep-test + (for-each (lambda (waiton) + ;; for now we are waiting only on the parent test + (let* ((parent-test-id (rmt:get-test-id run-id waiton "")) + (wtdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) + (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED") + (member (db:test-get-status wtdat) '("FAIL" "ABORT"))) + (member (db:test-get-status wtdat) '("KILLED")) + (member (db:test-get-state wtdat) '("INCOMPETE"))) + ;; (if (or (member (db:test-get-status wtdat) + ;; '("FAIL" "KILLED")) + ;; (member (db:test-get-state wtdat) + ;; '("INCOMPETE"))) + (set! keep-test #f)))) ;; no point in running this one again + waitons)))) + (if keep-test (set! runnables (cons testkeyname runnables))))) + testkeynames) + runnables)) + +;;====================================================================== +;; test steps +;;====================================================================== + +;; teststep-set-status! used to be here + +(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) + (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) + (and testdat + (equal? (test:get-state testdat) "KILLREQ")))) + +(define (test:tdb-get-rundat-count tdb) + (if tdb + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + tdb + "SELECT count(id) FROM test_rundat;") + res)) + 0) + +;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) +#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) + (let* ((cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (remtries 10)) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") + (set! remtries (- remtries 1)) + (thread-sleep! 10) + (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)))) + (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) + ))) + ) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -60,11 +60,11 @@ ;;====================================================================== ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; -(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. +#;(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. (let* ((runremote (or area-dat *runremote*)) (cinfo (if (remote? runremote) (remote-conndat runremote) #f))) (if cinfo @@ -210,11 +210,11 @@ ;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) ;; bunch of small functions factored out of send-receive to make debug easier ;; -(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) +#;(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) ;; (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") ;; (mutex-lock! *rmt-mutex*) (let* ((conninfo (remote-conndat runremote)) (dat (case (remote-transport runremote) @@ -224,25 +224,25 @@ ;; is needed to deal with ;; attemtped ;; communication to ;; servers that have gone ;; away - (http-transport:client-api-send-receive 0 conninfo cmd params) + #;(http-transport:client-api-send-receive 0 conninfo cmd params) ((commfail)(vector #f "communications fail")) ((exn)(vector #f "other fail" (print-call-chain))))) (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") (exit)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time + #t #;(http-transport:server-dat-update-last-access conninfo) ;; refresh access time (begin (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) (set! conninfo #f) (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. - (http-transport:close-connections area-dat: runremote))) + #;(http-transport:close-connections area-dat: runremote))) (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) (mutex-unlock! *rmt-mutex*) (if success ;; success only tells us that the transport was ;; successful, have to examine the data to see if ;; there was a detected issue at the other end @@ -352,11 +352,11 @@ (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res (handle-exceptions exn #f - (http-transport:client-api-send-receive run-id connection-info cmd params)))) + #;(http-transport:client-api-send-receive run-id connection-info cmd params)))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) ;; ;; Wrap json library for strings (why the ports crap in the first place?) @@ -934,11 +934,11 @@ (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (mutex-lock! *rmt-mutex*) (remote-conndat-set! runremote #f) - (http-transport:close-connections area-dat: runremote) + #;(http-transport:close-connections area-dat: runremote) (remote-server-url-set! runremote #f) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) @@ -962,14 +962,14 @@ ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") (mutex-lock! *rmt-mutex*) - (http-transport:close-connections area-dat: runremote) + #;(http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res ;; (include "common_records.scm") ) Index: servermod.scm ================================================================== --- servermod.scm +++ servermod.scm @@ -19,18 +19,27 @@ ;;====================================================================== (declare (unit servermod)) (declare (uses commonmod)) (declare (uses dbmod)) +(declare (uses mtconfigf)) +(declare (uses mtargs)) +(declare (uses tasksmod)) (module servermod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(import scheme chicken data-structures extras files) +(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 + srfi-69 format ports srfi-1 matchable + directory-utils md5 message-digest regex + stack) (import commonmod) (import dbmod) +(import tasksmod) +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) ;; (use (prefix ulex ulex:)) (include "common_records.scm") @@ -314,53 +323,19 @@ servr)) (if (and host port) (conc host ":" port) #f))) - ;; timeout is hms string: 1h 5m 3s, default is 1 minute ;; (define (server:expiration-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below (* 3600 (string->number tmo)) 60))) -;; ping the given server -;; -(define (server:check-server server-record) - (let* ((server-url (server:record->url server-record)) - (res (case *transport-type* - ((http)(server:ping server-url)) - ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) - ))) - (if res - server-url - #f))) - -;; no longer care if multiple servers are started by accident. older servers will drop off in time. -;; -(define (server:check-if-running areapath) ;; #!key (numservers "2")) - (let* ((ns (server:get-num-servers)) - (servers (server:get-best (server:get-list areapath)))) - ;; (print "servers: " servers " ns: " ns) - (if (or (and servers - (null? servers)) - (not servers) - (and (list? servers) - (< (length servers) (random ns)))) ;; somewhere between 0 and numservers - #f - (let loop ((hed (car servers)) - (tal (cdr servers))) - (let ((res (server:check-server hed))) - (if res - res - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - ;;====================================================================== ;; P K T S S T U F F ;;====================================================================== ;; ??? @@ -376,21 +351,10 @@ ;;====================================================================== ;; Call this to start the actual server ;; -;; all routes though here end in exit ... -;; -;; start_server -;; -(define (server:launch run-id transport-type) - (case transport-type - ((http)(http-transport:launch)) - ;;((nmsg)(nmsg-transport:launch run-id)) - ;;((rpc) (rpc-transport:launch run-id)) - (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) - ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;; Get the transport @@ -425,176 +389,22 @@ ((fs) result) (else (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) result))) -;; Given a run id start a server process ### NOTE ### > file 2>&1 -;; if the run-id is zero and the target-host is set -;; try running on that host -;; incidental: rotate logs in logs/ dir. -;; -(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((curr-host (get-host-name)) - ;; (attempt-in-progress (server:start-attempted? areapath)) - ;; (dot-server-url (server:check-if-running areapath)) - (curr-ip (server:get-best-guess-address curr-host)) - (curr-pid (current-process-id)) - (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - (target-host (car homehost)) - (testsuite (common:get-testsuite-name)) - (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) - (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") - ;; " -log " logfile - " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) - (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) - ;; we want the remote server to start in *toppath* so push there - (push-directory areapath) - (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") - (thread-start! log-rotate) - - ;; host.domain.tld match host? - (if (and target-host - ;; look at target host, is it host.domain.tld or ip address and does it - ;; match current ip or hostname - (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - (not (equal? curr-ip target-host))) - (begin - (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - (setenv "TARGETHOST" target-host))) - - (setenv "TARGETHOST_LOGF" logfile) - (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time - (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever - (system (conc "nbfake " cmdln)) - (unsetenv "TARGETHOST_LOGF") - (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - (thread-join! log-rotate) - (pop-directory))) - (define (server:get-client-signature) ;; (if *my-client-signature* *my-client-signature* ;; (let ((sig (server:mk-signature)) ;; ) ;; (set! *my-client-signature* sig) ;; *my-client-signature*))) -;; kind start up of servers, wait 40 seconds before allowing another server for a given -;; run-id to be launched -(define (server:kind-run areapath) - (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun - (call-num (car last-run-dat)) - (when-run (cadr last-run-dat)) - (run-delay (+ (case call-num - ((0) 0) - ((1) 20) - ((2) 300) - (else 600)) - (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously - (lock-file (conc areapath "/logs/server-start.lock"))) - (if (> (- (current-seconds) when-run) run-delay) - (begin - (common:simple-file-lock-and-wait lock-file expire-time: 15) - (server:run areapath) - (thread-sleep! 2) ;; don't release the lock for at least a few seconds - (common:simple-file-release-lock lock-file))) - (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) - -(define (server:start-and-wait areapath #!key (timeout 60)) - (let ((give-up-time (+ (current-seconds) timeout))) - (let loop ((server-url (server:check-if-running areapath)) - (try-num 0)) - (if (or server-url - (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. - server-url - (let ((num-ok (length (server:get-best (server:get-list areapath))))) - (if (and (> try-num 0) ;; first time through simply wait a little while then try again - (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one - (server:kind-run areapath)) - (thread-sleep! 5) - (loop (server:check-if-running areapath) - (+ try-num 1))))))) - -(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. - (define (server:kill servr) (match-let (((mod-time hostname port start-time pid) servr)) (tasks:kill-server hostname pid))) -;; called in megatest.scm, host-port is string hostname:port -;; -;; NOTE: This is NOT called directly from clients as not all transports support a client running -;; in the same process as the server. -;; -(define (server:ping host-port-in #!key (do-exit #f)) - (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - #f ;; (server:check-if-running *toppath*) - ;; (if (number? host-port-in) ;; we were handed a server-id - ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; ;; (print "srec: " srec " host-port-in: " host-port-in) - ;; (if srec - ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - ;; (conc "no such server-id " host-port-in))) - host-port-in))) ;; ) - (let* ((host-port (if host:port - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f)) - #f))) -;; (toppath (launch:setup))) - ;; (print "host-port=" host-port) - (if (not host-port) - (begin - (if host-port-in - (debug:print 0 *default-log-port* "ERROR: bad host:port")) - (if do-exit (exit 1)) - #f) - (let* ((iface (car host-port)) - (port (cadr host-port)) - (server-dat (http-transport:client-connect iface port)) - (login-res (rmt:login-no-auto-client-setup server-dat))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f))))))) - -;; run ping in separate process, safest way in some cases -;; -(define (server:ping-server ifaceport) - (with-input-from-pipe - (conc (common:get-megatest-exe) " -ping " ifaceport) - (lambda () - (let loop ((inl (read-line)) - (res "NOREPLY")) - (if (eof-object? inl) - (case (string->symbol res) - ((NOREPLY) #f) - ((LOGIN_OK) #t) - (else #f)) - (loop (read-line) inl)))))) - -;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). -;; -(define (server:login toppath) - (lambda (toppath) - (set! *db-last-access* (current-seconds)) ;; might not be needed. - (if (equal? *toppath* toppath) - #t - #f))) - ;; (define server:sync-lock-token "SERVER_SYNC_LOCK") ;; (define (server:release-sync-lock) ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) ;; (define (server:have-sync-lock?) ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) Index: subrun-inc.scm ================================================================== --- subrun-inc.scm +++ subrun-inc.scm @@ -14,231 +14,5 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . -;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') - -(define (subrun:subrun-test-initialized? test-run-dir) - (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) - (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) - #t - #f)) - -(define (subrun:launch-dashboard test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((subarea (subrun:get-runarea test-run-dir))) - (if (and subarea (common:file-exists? subarea)) - (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) - -(define (subrun:subrun-removed? test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (common:file-exists? flagfile) - #t - #f)) - #t)) - -(define (subrun:set-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) - (with-output-to-file flagfile - (lambda () (print (current-seconds))))))) - -(define (subrun:unset-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) - (delete-file flagfile)))) - - -(define (subrun:testconfig-defines-subrun? testconfig) - (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested - -(define (subrun:initialize-toprun-test testconfig test-run-dir) - (let ((ra (configf:lookup testconfig "subrun" "run-area")) - (logpro (configf:lookup testconfig "subrun" "logpro")) - (symlink-target (conc test-run-dir "/subrun-area")) - ) - (if (not ra) ;; when runarea is not set we default to *toppath*. However - (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) - ;; we need to force the setting in the testconfig so it will - ;; be preserved in the testconfig.subrun file - (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) - (set! ra fallback-run-area))) - (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun - (if (common:file-exists? symlink-target) - (delete-file symlink-target)) - (create-symbolic-link ra symlink-target) - (configf:write-alist testconfig "testconfig.subrun"))) - -(define (subrun:set-state-status test-run-dir state status new-state-status) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-set-state-status "new-state-status - (if state (conc " -state "state) "") - (if status (conc " -status "status) ""))) - (log-prefix - (subrun:sanitize-path - (conc "set-state-status="new-state-status - (if state (conc ":state="state) "") - (if status (conc "+status="status) "")))) - (submt-result - (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) - submt-result))) - -(define (subrun:remove-subrun test-run-dir keep-records ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-remove-runs" - (if keep-records "-keep-records " "") - )) - (remove-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) - (if remove-result - (begin - (subrun:set-subrun-removed test-run-dir) - #t) - #f)) - #t)) - -(define (subrun:kill-subrun test-run-dir ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-kill-runs" )) - (kill-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) - kill-result) - #t)) - -(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work - (if (subrun:subrun-removed? test-run-dir) - (subrun:unset-subrun-removed test-run-dir)) - - (let* ((log-prefix "run") - (switches (subrun:selector+log-switches test-run-dir log-prefix)) - (run-wait #t) - (cmd (conc "megatest " sub-cmd " " switches" " - (if run-wait "-run-wait " "")))) - cmd)) - - -(define (subrun:sanitize-path inpath) - (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) - (regex#string-substitute insane-pattern "_" inpath #t))) - -(define (subrun:get-runarea test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((info-alist (subrun:selector+log-alist - test-run-dir - "foo")) - (run-area (if (list? info-alist) - (alist-ref "-start-dir" info-alist equal? #f) - #f))) - run-area) - #f)) - -(define (subrun:selector+log-alist test-run-dir log-prefix) - (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) - (subrunfile (conc test-run-dir "/testconfig.subrun" )) - (subrundata (with-input-from-file subrunfile read)) - (subrunconfig (configf:alist->config subrundata)) - (run-area (configf:lookup subrunconfig "subrun" "run-area")) - (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf - (get-environment-variable "MT_RUN_AREA_HOME") - "/no/rundir/found")) - ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) - ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) - (switch-alist-pre (filter-map (lambda (item) - (let* ((config-key (car item)) - (switch (cdr item)) - (defval (alist-ref config-key defvals equal? #f)) - (val (or (configf:lookup subrunconfig "subrun" config-key) - defval))) - (if val - (cons switch val) - #f))) - switch-def-alist)) - - ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null - (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) - (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) - (testpatt (alist-ref "-testpatt" switch-alist-pre equal? - (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not - ;; otherwise specified - - ;; define compact-stem for logfile - (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref - (runname (alist-ref "-runname" switch-alist-pre equal? #f)) - - - (compact-stem (subrun:sanitize-path - (conc - target - "-" - runname - "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) - (logfile (conc - test-run-dir "/" - (if log-prefix - (conc (subrun:sanitize-path log-prefix) "-") - "") - compact-stem - ".log")) - ;; swap out testpatt with modified test-patt and add -log - (switch-alist (cons - (cons "-log" logfile) - (map (lambda (item) - (if (equal? (car item) "-testpatt") - (cons "-testpatt" testpatt) - item)) - switch-alist-pre)))) - switch-alist)) - ;; note - get precmd from subrun section - ;; apply to submegatest commands - -(define (subrun:get-log-path test-run-dir log-prefix) - (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res (alist-ref "-log" alist equal? #f))) - res)) - -(define (subrun:selector+log-switches test-run-dir log-prefix) - (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res - (string-intersperse - (apply - append - (map - (lambda (x) - (list (car x) (cdr x))) - switch-alist)) - " "))) - res)) - -(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) - (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) - (cmd (conc "megatest " selector-switches " " action-switches-str )) - (pid #f) - (proc (lambda () - (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) - ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) - (lambda () - (common:without-vars proc "^MT_.*"))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))) - (begin - (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) - (if (eq? 0 exit-code) - (begin - #t) - (begin - #f)))))))) - - - -;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") Index: subrunmod.scm ================================================================== --- subrunmod.scm +++ subrunmod.scm @@ -18,18 +18,249 @@ ;;====================================================================== (declare (unit subrunmod)) (declare (uses commonmod)) +(declare (uses mtconfigf)) (module subrunmod * (import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) -(import commonmod) +(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 + srfi-69 format ports srfi-1 matchable + call-with-environment-variables) +(import + commonmod + (prefix mtconfigf configf:)) ;; (use (prefix ulex ulex:)) (include "common_records.scm") +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + +(define (subrun:subrun-test-initialized? test-run-dir) + (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) + (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) + #t + #f)) + +(define (subrun:launch-dashboard test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((subarea (subrun:get-runarea test-run-dir))) + (if (and subarea (common:file-exists? subarea)) + (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) + +(define (subrun:subrun-removed? test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (common:file-exists? flagfile) + #t + #f)) + #t)) + +(define (subrun:set-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) + (with-output-to-file flagfile + (lambda () (print (current-seconds))))))) + +(define (subrun:unset-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) + (delete-file flagfile)))) + + +(define (subrun:testconfig-defines-subrun? testconfig) + (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested + +(define (subrun:initialize-toprun-test testconfig test-run-dir) + (let ((ra (configf:lookup testconfig "subrun" "run-area")) + (logpro (configf:lookup testconfig "subrun" "logpro")) + (symlink-target (conc test-run-dir "/subrun-area")) + ) + (if (not ra) ;; when runarea is not set we default to *toppath*. However + (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) + ;; we need to force the setting in the testconfig so it will + ;; be preserved in the testconfig.subrun file + (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) + (set! ra fallback-run-area))) + (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun + (if (common:file-exists? symlink-target) + (delete-file symlink-target)) + (create-symbolic-link ra symlink-target) + (configf:write-alist testconfig "testconfig.subrun"))) + +(define (subrun:set-state-status test-run-dir state status new-state-status) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-set-state-status "new-state-status + (if state (conc " -state "state) "") + (if status (conc " -status "status) ""))) + (log-prefix + (subrun:sanitize-path + (conc "set-state-status="new-state-status + (if state (conc ":state="state) "") + (if status (conc "+status="status) "")))) + (submt-result + (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) + submt-result))) + +(define (subrun:remove-subrun test-run-dir keep-records ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-remove-runs" + (if keep-records "-keep-records " "") + )) + (remove-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) + (if remove-result + (begin + (subrun:set-subrun-removed test-run-dir) + #t) + #f)) + #t)) + +(define (subrun:kill-subrun test-run-dir ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-kill-runs" )) + (kill-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) + kill-result) + #t)) + +(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work + (if (subrun:subrun-removed? test-run-dir) + (subrun:unset-subrun-removed test-run-dir)) + + (let* ((log-prefix "run") + (switches (subrun:selector+log-switches test-run-dir log-prefix)) + (run-wait #t) + (cmd (conc "megatest " sub-cmd " " switches" " + (if run-wait "-run-wait " "")))) + cmd)) + + +(define (subrun:sanitize-path inpath) + (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) + (regex#string-substitute insane-pattern "_" inpath #t))) + +(define (subrun:get-runarea test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((info-alist (subrun:selector+log-alist + test-run-dir + "foo")) + (run-area (if (list? info-alist) + (alist-ref "-start-dir" info-alist equal? #f) + #f))) + run-area) + #f)) + +(define (subrun:selector+log-alist test-run-dir log-prefix) + (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) + (subrunfile (conc test-run-dir "/testconfig.subrun" )) + (subrundata (with-input-from-file subrunfile read)) + (subrunconfig (configf:alist->config subrundata)) + (run-area (configf:lookup subrunconfig "subrun" "run-area")) + (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf + (get-environment-variable "MT_RUN_AREA_HOME") + "/no/rundir/found")) + ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) + ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) + (switch-alist-pre (filter-map (lambda (item) + (let* ((config-key (car item)) + (switch (cdr item)) + (defval (alist-ref config-key defvals equal? #f)) + (val (or (configf:lookup subrunconfig "subrun" config-key) + defval))) + (if val + (cons switch val) + #f))) + switch-def-alist)) + + ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null + (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) + (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) + (testpatt (alist-ref "-testpatt" switch-alist-pre equal? + (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not + ;; otherwise specified + + ;; define compact-stem for logfile + (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref + (runname (alist-ref "-runname" switch-alist-pre equal? #f)) + + + (compact-stem (subrun:sanitize-path + (conc + target + "-" + runname + "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) + (logfile (conc + test-run-dir "/" + (if log-prefix + (conc (subrun:sanitize-path log-prefix) "-") + "") + compact-stem + ".log")) + ;; swap out testpatt with modified test-patt and add -log + (switch-alist (cons + (cons "-log" logfile) + (map (lambda (item) + (if (equal? (car item) "-testpatt") + (cons "-testpatt" testpatt) + item)) + switch-alist-pre)))) + switch-alist)) + ;; note - get precmd from subrun section + ;; apply to submegatest commands + +(define (subrun:get-log-path test-run-dir log-prefix) + (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res (alist-ref "-log" alist equal? #f))) + res)) + +(define (subrun:selector+log-switches test-run-dir log-prefix) + (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res + (string-intersperse + (apply + append + (map + (lambda (x) + (list (car x) (cdr x))) + switch-alist)) + " "))) + res)) + +(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) + (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) + (cmd (conc "megatest " selector-switches " " action-switches-str )) + (pid #f) + (proc (lambda () + (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) + ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () + (common:without-vars proc "^MT_.*"))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))) + (begin + (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) + (if (eq? 0 exit-code) + (begin + #t) + (begin + #f)))))))) + + + +;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") ) Index: tests-inc.scm ================================================================== --- tests-inc.scm +++ tests-inc.scm @@ -16,1742 +16,5 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== -;;====================================================================== -;; Tests -;;====================================================================== - -;; return items given config -;; -(define (tests:get-items tconfig) - (let ((items (hash-table-ref/default tconfig "items" #f)) ;; items 4 - (itemstable (hash-table-ref/default tconfig "itemstable" #f))) - ;; if either items or items table is a proc return it so test running - ;; process can know to call items:get-items-from-config - ;; if either is a list and none is a proc go ahead and call get-items - ;; otherwise return #f - this is not an iterated test - (cond - ((procedure? items) - (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") - items) ;; calc later - ((procedure? itemstable) - (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") - itemstable) ;; calc later - ((filter (lambda (x) - (let ((val (car x))) - (if (procedure? val) val #f))) - (append (if (list? items) items '()) - (if (list? itemstable) itemstable '()))) - 'have-procedure) - ((or (list? items)(list? itemstable)) ;; calc now - (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" - " items: " items " itemstable: " itemstable) - (items:get-items-from-config tconfig)) - (else #f)))) ;; not iterated - - -;; returns waitons waitors tconfigdat -;; -(define (tests:get-waitons test-name all-tests-registry) - (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t) - (let ((instr (if config - (configf:lookup config "requirements" "waiton") - (begin ;; No config means this is a non-existant test - (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") - (exit 1)))) - (instr2 (if config - (configf:lookup config "requirements" "waitor") - ""))) - (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2) - (let ((newwaitons - (string-split (cond - ((procedure? instr) ;; here - (let ((res (instr))) - (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name) - res)) - ((string? instr) instr) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) - "")))) - (newwaitors - (string-split (cond - ((procedure? instr2) - (let ((res (instr2))) - (debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name) - res)) - ((string? instr2) instr2) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) - ""))))) - (values - ;; the waitons - (filter (lambda (x) - (if (hash-table-ref/default all-tests-registry x #f) - #t - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) - #f))) - newwaitons) - (filter (lambda (x) - (if (hash-table-ref/default all-tests-registry x #f) - #t - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) - #f))) - newwaitors) - config))))) - -;; Check for waiver eligibility -;; -(define (tests:check-waiver-eligibility testdat prev-testdat) - (let* ((test-registry (make-hash-table)) - (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) - (test-rundir ;; (sdb:qry 'passstr - (db:test-get-rundir testdat)) ;; ) - (prev-rundir ;; (sdb:qry 'passstr - (db:test-get-rundir prev-testdat)) ;; ) - (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) - (waiver-rx (regexp "^(\\S+)\\s+(.*)$")) - (diff-rule "diff %file1% %file2%") - (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html")) - (if (not (common:file-exists? test-rundir)) - (begin - (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver") - #f) - (begin - (push-directory test-rundir) - (let ((result (if (null? waivers) - #f - (let loop ((hed (car waivers)) - (tal (cdr waivers))) - (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"") - (let* ((waiver (configf:lookup testconfig "waivers" hed)) - (wparts (if waiver (string-match waiver-rx waiver) #f)) - (waiver-rule (if wparts (cadr wparts) #f)) - (waiver-glob (if wparts (caddr wparts) #f)) - (logpro-file (if waiver - (let ((fname (conc hed ".logpro"))) - (if (common:file-exists? fname) - fname - (begin - (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff") - #f))) - #f)) - ;; if rule by name of waiver-rule is found in testconfig - use it - ;; else if waivername.logpro exists use logpro-rule - ;; else default to diff-rule - (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule))) - (if rule - rule - (if logpro-file - logpro-rule - (begin - (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule") - diff-rule))))) - ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t) - (processed-cmd (string-substitute - "%file1%" (conc test-rundir "/" waiver-glob) - (string-substitute - "%file2%" (conc prev-rundir "/" waiver-glob) - (string-substitute - "%waivername%" hed rule-string #t) #t) #t)) - (res #f)) - (debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"") - (if (eq? (system processed-cmd) 0) - (if (null? tal) - #t - (loop (car tal)(cdr tal))) - #f)))))) - (pop-directory) - result))))) - -;; Do not rpc this one, do the underlying calls!!! -(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) - (let* ((real-status status) - (otherdat (if dat dat (make-hash-table))) - (testdat (rmt:get-test-info-by-id run-id test-id)) - (test-name (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - ;; before proceeding we must find out if the previous test (where all keys matched except runname) - ;; was WAIVED if this test is FAIL - - ;; NOTES: - ;; 1. Is the call to test:get-previous-run-record remotified? - ;; 2. Add test for testconfig waiver propagation control here - ;; - (prev-test (if (equal? status "FAIL") - (rmt:get-previous-test-run-record run-id test-name item-path) - #f)) - (waived (if prev-test - (if prev-test ;; true if we found a previous test in this run series - (let ((prev-status (db:test-get-status prev-test)) - (prev-state (db:test-get-state prev-test)) - (prev-comment (db:test-get-comment prev-test))) - (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment) - (if (and (equal? prev-state "COMPLETED") - (equal? prev-status "WAIVED")) - (if comment - comment - prev-comment) ;; waived is either the comment or #f - #f)) - #f) - #f))) - (if (and waived - (tests:check-waiver-eligibility testdat prev-test)) - (set! real-status "WAIVED")) - - (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) - - ;; update the primary record IF state AND status are defined - (if (and state status) - (begin - (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment)) - ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status - )) - - ;; if status is "AUTO" then call rollup (note, this one modifies data in test - ;; run area, it does remote calls under the hood. - ;; (if (and test-id state status (equal? status "AUTO")) - ;; (rmt:test-data-rollup run-id test-id status)) - - ;; add metadata (need to do this way to avoid SQL injection issues) - - ;; :first_err - ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) - ;; (if val - ;; (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) - ;; - ;; ;; :first_warn - ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f))) - ;; (if val - ;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) - - (let ((category (hash-table-ref/default otherdat ":category" "")) - (variable (hash-table-ref/default otherdat ":variable" "")) - (value (hash-table-ref/default otherdat ":value" #f)) - (expected (hash-table-ref/default otherdat ":expected" "n/a")) - (tol (hash-table-ref/default otherdat ":tol" "n/a")) - (units (hash-table-ref/default otherdat ":units" "")) - (type (hash-table-ref/default otherdat ":type" "")) - (dcomment (hash-table-ref/default otherdat ":comment" ""))) - (debug:print 4 *default-log-port* - "category: " category ", variable: " variable ", value: " value - ", expected: " expected ", tol: " tol ", units: " units) - (if (and value) ;; require only value; BB was- all three required - (let ((dat (conc category "," - variable "," - value "," - expected "," - tol "," - units "," - dcomment ",," ;; extra comma for status - type ))) - ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. - (rmt:csv->test-data run-id test-id - dat) - ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start" - ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue. - (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :) - ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server. - ))) - - ;; need to update the top test record if PASS or FAIL and this is a subtest - ;;;;;; (if (not (equal? item-path "")) - ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) - - (if (or (and (string? comment) - (string-match (regexp "\\S+") comment)) - waived) - (let ((cmt (if waived waived comment))) - (rmt:general-call 'set-test-comment run-id cmt test-id))))) - -(define (tests:test-set-toplog! run-id test-name logf) - (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name)) - -(define (tests:summarize-items run-id test-id test-name force) - ;; if not force then only update the record if one of these is true: - ;; 1. logf is "log/final.log - ;; 2. logf is same as outputfilename - (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) - (orig-dir (current-directory)) - (logf-info (rmt:test-get-logfile-info run-id test-name)) - (logf (if logf-info (cadr logf-info) #f)) - (path (if logf-info (car logf-info) #f))) - ;; This query finds the path and changes the directory to it for the test - (if (and (string? path) - (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ... - (begin - (debug:print 4 *default-log-port* "Found path: " path) - (change-directory path)) - ;; (set! outputfilename (conc path "/" outputfilename))) - (debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) - (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) - (if (or (equal? logf "logs/final.log") - (equal? logf outputfilename) - force) - (let ((my-start-time (current-seconds)) - (lockf (conc outputfilename ".lock"))) - (let loop ((have-lock (common:simple-file-lock lockf))) - (if have-lock - (let ((script (configf:lookup *configdat* "testrollup" test-name))) - (print "Obtained lock for " outputfilename) - (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) - (if script - (system (conc script " > " outputfilename " & ")) - (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) - (common:simple-file-release-lock lockf) - (change-directory orig-dir) - ;; NB// tests:test-set-toplog! is remote internal... - (tests:test-set-toplog! run-id test-name outputfilename)) - ;; didn't get the lock, check to see if current update started later than this - ;; update, if so we can exit without doing any work - (if (> my-start-time (handle-exceptions - exn - 0 - (file-modification-time lockf))) - ;; we started since current re-gen in flight, delay a little and try again - (begin - (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") - (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds - (loop (common:simple-file-lock lockf)))))))))) - -(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) - (let ((counts (make-hash-table)) - (statecounts (make-hash-table)) - (outtxt "") - (tot 0) - (testdat (rmt:test-get-records-for-index-file run-id test-name))) - (with-output-to-file outputfilename - (lambda () - (set! outtxt (conc outtxt "Summary: " test-name - "

Summary for " test-name "

")) - (for-each - (lambda (testrecord) - (let ((id (vector-ref testrecord 0)) - (itempath (vector-ref testrecord 1)) - (state (vector-ref testrecord 2)) - (status (vector-ref testrecord 3)) - (run_duration (vector-ref testrecord 4)) - (logf (vector-ref testrecord 5)) - (comment (vector-ref testrecord 6))) - (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) - (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) - (set! outtxt (conc outtxt "" - ;; " " itempath "" - " " itempath "" - "" state "" - "" status "" - "" (if (equal? comment "") - " " - comment) "" - "")))) - (if (list? testdat) - testdat - (begin - (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) - '()))) - - (print "
") - ;; Print out stats for status - (set! tot 0) - (print "") - (for-each (lambda (state) - (set! tot (+ tot (hash-table-ref statecounts state))) - (print "")) - (hash-table-keys statecounts)) - (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") - (print "
") - ;; Print out stats for state - (set! tot 0) - (print "") - (for-each (lambda (status) - (set! tot (+ tot (hash-table-ref counts status))) - (print "")) - (hash-table-keys counts)) - (print "

Status stats

" status - "" (hash-table-ref counts status) "
Total" tot "
") - (print "
") - - (print "" - "" - outtxt "
ItemStateStatusComment
") - ;; (release-dot-lock outputfilename) - ;;(rmt:update-run-stats - ;; run-id - ;; (hash-table-map - ;; state-status-counts - ;; (lambda (key val) - ;; (append key (list val))))) - )))) - -(define tests:css-jscript-block -#< -ul.LinkedList { display: block; } -/* ul.LinkedList ul { display: none; } */ -.HandCursorStyle { cursor: pointer; cursor: hand; } /* For IE */ -th {background-color: #8c8c8c;} -td.test {background-color: #d9dbdd;} -td.PASS {background-color: #347533;} -td.FAIL {background-color: #cc2812;} -td.SKIP{background-color: #FFD733;} -td.WARN {background-color: #EA8724;} -td.WAIVED {background-color: #838A12;} -td.ABORT{background-color: #EA24B7;} -.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;} - - - - - - -EOF -) - -(define tests:css-jscript-block-dynamic -#< -EOF -) - -(define (test:js-block javascript-lib) - (conc "" )) - - -(define tests:css-jscript-block-static (test:js-block *java-script-lib*)) - -(define (tests:css-jscript-block-cond dynamic) - (if (equal? dynamic #t) - tests:css-jscript-block-dynamic - tests:css-jscript-block-static)) - - -(define (tests:run-record->test-path run numkeys) - (append (take (vector->list run) numkeys) - (list (vector-ref run (+ 1 numkeys))))) - - -(define (tests:get-rest-data runs header numkeys) - (let ((resh (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-id (db:get-value-by-header run header "id")) - (run-dir (tests:run-record->test-path run numkeys)) - (test-data (rmt:get-tests-for-run - run-id - "%" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f))) - - (map (lambda (test) - (let* ((test-name (vector-ref test 2)) - (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13))) - (test-item (conc test-name ":" (vector-ref test 11))) - (test-status (vector-ref test 4))) - - (if (not (hash-table-ref/default resh test-name #f)) - (hash-table-set! resh test-name (make-hash-table))) - (if (not (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f)) - (hash-table-set! (hash-table-ref/default resh test-name #f) test-item (make-hash-table))) - (hash-table-set! (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f) run-id (list test-status test-html-path)))) - test-data))) - runs) - resh)) - - -;; hash-table tree to html list tree -;; -;; tipfunc takes two parameters: y the tip value and path the path to that point -;; -(define (common:htree->html ht path tipfunc) - (let ((datlist (sort (hash-table->alist ht) - (lambda (a b) - (string< (car a)(car b)))))) - (if (null? datlist) - (tipfunc #f path) ;; really shouldn't get here - (s:ul - (map (lambda (x) - (let* ((levelname (car x)) - (y (cdr x)) - (newpath (append path (list levelname))) - (leaf (or (not (hash-table? y)) - (null? (hash-table-keys y))))) - (if leaf - (s:li (tipfunc y newpath)) - (s:li - (list - levelname - (common:htree->html y newpath tipfunc)))))) - datlist))))) - - -;; tests:genrate dashboard body -;; - -(define (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt) - (let* ((start (* page pg-size)) - ;(runsdat (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys))) - (runsdat (rmt:get-runs-by-patt keys run-patt target-patt start pg-size #f 0 sort-order: "desc")) - ; db:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-update - (header (vector-ref runsdat 0)) - (runs (vector-ref runsdat 1)) - (ctr 0) - (test-runs-hash (tests:get-rest-data runs header numkeys)) - (test-list (hash-table-keys test-runs-hash))) - - (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag) - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (get-prev-links page linktree) - (get-next-links page linktree total-runs) - - (s:h1 "Summary for " area-name) - (s:h3 "Filter" ) - (s:input 'type "text" 'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()") - ;; top list - - (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 - (map (lambda (key) - (let* ((res (s:tr 'class "something" - (s:th key ) - (map (lambda (run) - (s:th (vector-ref run ctr))) - runs)))) - (set! ctr (+ ctr 1)) - res)) - keys) - (s:tr - (s:th "Run Name") - (map (lambda (run) - (s:th (db:get-value-by-header run header "runname"))) - runs)) - - (map (lambda (test-name) - (let* ((item-hash (hash-table-ref/default test-runs-hash test-name #f)) - (item-keys (sort (hash-table-keys item-hash) string<=?))) - (map (lambda (item-name) - (let* ((res (s:tr 'class item-name - (s:td item-name 'class "test" ) - (map (lambda (run) - (let* ((run-test (hash-table-ref/default item-hash item-name #f)) - (run-id (db:get-value-by-header run header "id")) - (result (hash-table-ref/default run-test run-id "n/a")) - ;(relative-path (get-relative-path)) - (status (if (string? result) - result - (car result))) - (link (if (string? result) - result - (if (equal? flag #t) - (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname=" item-name )) - (s:a (car result) 'href (string-substitute (conc linktree "/") "" (cadr result) "-")))))) - (s:td link 'class status))) - runs)))) - res)) - item-keys))) - test-list)))))) - -;; (tests:create-html-tree "test-index.html") -;; -(define (tests:create-html-tree outf) - (let* ((lockfile (conc outf ".lock")) - (runs-to-process '()) - (linktree (common:get-linktree)) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (run-patt (or (args:get-arg "-run-patt") - (args:get-arg "-runname") - "%")) - (target (or (args:get-arg "-target-patt") - (args:get-arg "-target") - "%")) - (targlist (string-split target "/")) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "%")) - targlist)) - (target-patt (string-join targtweaked "/")) - ;(total-runs (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target - (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) - (pg-size 10)) - (if (common:simple-file-lock lockfile) - (begin - ;(print total-runs) - (let loop ((page 0)) - (let* ((oup (open-output-file (or outf (conc linktree "/page" page ".html")))) - (get-prev-links (lambda (page linktree ) - (let* ((link (if (not (eq? page 0)) - (s:a "<<prev" 'href (conc "page" (- page 1) ".html")) - (s:a "" 'href (conc "page" page ".html"))))) - link))) - (get-next-links (lambda (page linktree total-runs) - (let* ((link (if (> total-runs (+ 10 (* page pg-size))) - (s:a "next>>" 'href (conc "page" (+ page 1) ".html")) - (s:a "" 'href (conc "page" page ".html"))))) - link))) ) - (print "total runs: " total-runs) - (s:output-new - oup - (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function - (close-output-port oup) - ; (set! page (+ 1 page)) - (if (> total-runs (* (+ 1 page) pg-size)) - (loop (+ 1 page))))) - (common:simple-file-release-lock lockfile)) - - #f))) - - -(define (tests:readlines filename) - (call-with-input-file filename - (lambda (p) - (let loop ((line (read-line p)) - (result '())) - (if (eof-object? line) - (reverse result) - (loop (read-line p) (cons line result))))))) - -(define (tests:get-test-log run-id test-name item-name) - (let* ((test-data (rmt:get-tests-for-run - (string->number run-id) - test-name ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (path "") - (found 0)) - (debug:print-info 0 *default-log-port* "found: " found ) - - (let loop ((hed (car test-data)) - (tal (cdr test-data))) - (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13)) - - (if (equal? (vector-ref hed 11) item-name) - (begin - (set! found 1) - (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13))))) - (if (and (not (null? tal)) (equal? found 0)) - (loop (car tal)(cdr tal)))) - (if (equal? path "") - "

Data not found

" - (string-join (tests:readlines path) "\n")))) - - -(define (tests:dynamic-dboard page) -;(define (tests:create-html-tree o) - (let* ( -;(page "1") - (linktree (common:get-linktree)) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (targtweaked (make-list numkeys "%")) - (target-patt (string-join targtweaked "/")) - (total-runs (rmt:get-num-runs "%")) - (pg-size 10) - (pg (if (equal? page #f) - 0 - (- (string->number page) 1))) - (get-prev-links (lambda (pg linktree) - (debug:print-info 0 *default-log-port* "val: " (- 1 pg)) - (let* ((link (if (not (eq? pg 0)) - (s:a "<<prev " 'href (conc "dashboard?page=" pg )) - (s:a "" 'href (conc "dashboard?page=" pg))))) - link))) - (get-next-links (lambda (pg linktree total-runs) - (debug:print-info 0 *default-log-port* "val: " pg) - (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size) - - (let* ((link (if (> total-runs (+ 10 (* pg pg-size))) - (s:a "next>> " 'href (conc "dashboard?page=" (+ pg 2) )) - (s:a "" 'href (conc "dashboard?page=" pg ))))) - link))) - (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function - html-body)) - -(define (tests:create-html-summary outf) - (let* ((lockfile (conc outf ".lock")) - (linktree (common:get-linktree)) - (keys (rmt:get-keys)) - (area-name (common:get-testsuite-name)) - (run-patt (or (args:get-arg "-run-patt") - (args:get-arg "-runname") - "%")) - (target (or (args:get-arg "-target-patt") - (args:get-arg "-target") - "%")) - (targlist (string-split target "/")) - (numkeys (length keys)) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "%")) - targlist)) - (target-patt (string-join targtweaked "/"))) - (if (common:simple-file-lock lockfile) - (begin - (let* (;(runsdat1 (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys))) - (runsdat (rmt:get-runs-by-patt keys run-patt target-patt #f #f #f 0)) - (runs (vector-ref runsdat 1)) - (header (vector-ref runsdat 0)) - (oup (open-output-file (or outf (conc linktree "/targets.html")))) - (target-hash (test:create-target-hash runs header (length keys)))) - (test:create-target-html target-hash oup area-name linktree) - (test:create-run-html runs area-name linktree (length keys) header)) - (common:simple-file-release-lock lockfile)) - #f))) - -(define (test:get-test-hash test-data) - (let ((resh (make-hash-table))) - (map (lambda (test) - (let* ((test-name (vector-ref test 2)) - (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html")) - (conc (vector-ref test 10) "/test-summary.html" ) - (conc (vector-ref test 10) "/" (vector-ref test 13)))) - (test-item (vector-ref test 11)) - (test-status (vector-ref test 4))) - (if (not (hash-table-ref/default resh test-item #f)) - (hash-table-set! resh test-item (make-hash-table))) - (hash-table-set! (hash-table-ref/default resh test-item #f) test-name (list test-status test-html-path)))) - test-data) -resh)) - -(define (test:get-data->b-keys ordered-data a-keys) - (delete-duplicates - (sort (apply - append - (map (lambda (sub-key) - (let ((subdat (hash-table-ref ordered-data sub-key))) - (hash-table-keys subdat))) - a-keys)) - string>=?))) - - -(define (test:create-run-html runs area-name linktree numkeys header) - (map (lambda (run) - (let* ((target (string-join (take (vector->list run) numkeys) "/")) - (run-name (db:get-value-by-header run header "runname")) - (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time"))) - (oup (if (file-exists? (conc linktree "/" target "/" run-name)) - (open-output-file (conc linktree "/" target "/" run-name "/run.html")) - #f)) - (run-id (db:get-value-by-header run header "id")) - (test-data (rmt:get-tests-for-run - run-id - "%" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (item-test-hash (test:get-test-hash test-data)) - (items (hash-table-keys item-test-hash)) - (test-names (test:get-data->b-keys item-test-hash items))) - (if oup - (begin - (s:output-new - oup - (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) - (s:title "Runs View " run-name) - (s:body - (s:h1 "Runs View " ) - (s:h3 "Target" target) - (s:p - (s:b "Run name" ) run-name) - (s:p - (s:b "Run Date" ) run-time) - (s:table 'border 1 'cellspacing 0 - (s:tr - (s:th "Items") - (map (lambda (test) - (s:th test)) - test-names)) - (map (lambda (item) - (let* ((test-hash (hash-table-ref/default item-test-hash item #f))) - (if test-hash - (begin - (s:tr - (s:td 'class "test" item) - (map (lambda (test) - (let* ((test-details (hash-table-ref/default test-hash test #f)) - (status (if test-details - (car test-details))) - (link (if test-details - (string-substitute (conc linktree "/" target "/" run-name "/") "" (cadr test-details) "-")))) - (if test-details - (s:td 'class status - (s:a 'class "link" 'href link status )) - (s:td "")))) - test-names)))))) - (sort items string<=?)))))) - (close-output-port oup)) - (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html")))) -runs)) - -(define (test:create-target-hash runs header numkeys) - (let ((resh (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-name (db:get-value-by-header run header "runname")) - (target (string-join (take (vector->list run) numkeys) "/")) - (run-list (hash-table-ref/default resh target #f))) - - (if (not run-list) - (hash-table-set! resh target (list run-name)) - (hash-table-set! resh target (cons run-name run-list))))) - runs) - resh)) - -(define (test:get-max-run-cnt target-hash targets) - (let* ((cnt 0 )) - (map (lambda (target) - (let* ((runs (hash-table-ref/default target-hash target #f)) - (run-length (if runs - (length runs) - 0))) - - (if (< cnt run-length) - (set! cnt run-length)))) - targets) -cnt)) - -(define (test:pad-runs target-hash targets max-row-length) - (map (lambda (target) - (let loop ((run-list (hash-table-ref/default target-hash target #f))) - (if (< (length run-list) max-row-length) - (begin - (hash-table-set! target-hash target (cons "" run-list)) - (loop (hash-table-ref/default target-hash target #f) ))))) - targets) - target-hash) - -(define (test:create-target-html target-hash oup area-name linktree) - (let* ((targets (hash-table-keys target-hash)) - (max-row-length (test:get-max-run-cnt target-hash targets)) - (pad-runs-hash (test:pad-runs target-hash targets max-row-length))) - (s:output-new - oup - (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) - - (s:title "Target View " area-name) - (s:body - (s:h1 "Target View " area-name) - (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 - (s:tr 'class "something" - (s:th "Target") - (s:th 'colspan max-row-length "Runs")) - (let* ((tbl (map (lambda (target) - (s:tr - (s:td 'class "test" target) - (let* ((runs (hash-table-ref/default target-hash target #f)) - (rest-row (map (lambda (run) - (if (equal? run "") - (s:td run) - (if (file-exists?(conc linktree "/" target "/" run )) - (begin - (s:td - (s:a 'href (conc target "/" run "/run.html") run)))))) - (reverse runs)))) - rest-row))) - targets))) - tbl))))) - (close-output-port oup))) - - -(define (tests:create-html-tree-old outf) - (let* ((lockfile (conc outf ".lock")) - (runs-to-process '())) - (if (common:simple-file-lock lockfile) - (let* ((linktree (common:get-linktree)) - (oup (open-output-file (or outf (conc linktree "/runs-index.html")))) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (runsdat (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys))) - (header (vector-ref runsdat 0)) - (runs (vector-ref runsdat 1)) - (runtreedat (map (lambda (x) - (tests:run-record->test-path x numkeys)) - runs)) - (runs-htree (common:list->htree runtreedat))) - (set! runs-to-process runs) - (s:output-new - oup - (s:html tests:css-jscript-block - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (s:h1 "Summary for " area-name) - ;; top list - (s:ul 'id "LinkedList1" 'class "LinkedList" - (s:li - "Runs" - (common:htree->html runs-htree - '() - (lambda (x p) - (let* ((targ-path (string-intersperse p "/")) - (full-path (conc linktree "/" targ-path)) - (run-name (car (reverse p)))) - (if (and (common:file-exists? full-path) - (directory? full-path) - (file-write-access? full-path)) - (s:a run-name 'href (conc targ-path "/run-summary.html")) - (begin - (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html") - (conc run-name " (Not able to create summary at " targ-path ")"))))))))))) - (close-output-port oup) - (common:simple-file-release-lock lockfile) - - (for-each - (lambda (run) - (let* ((test-subpath (tests:run-record->test-path run numkeys)) - (run-id (db:get-value-by-header run header "id")) - (run-dir (tests:run-record->test-path run numkeys)) - (test-dats (rmt:get-tests-for-run - run-id - "%/" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (tests-tree-dat (map (lambda (test-dat) - ;; (tests:run-record->test-path x numkeys)) - (let* ((test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (full-name (db:test-make-full-name test-name item-path)) - (path-parts (string-split full-name))) - path-parts)) - test-dats)) - (tests-htree (common:list->htree tests-tree-dat)) - (html-dir (conc linktree "/" (string-intersperse run-dir "/"))) - (html-path (conc html-dir "/run-summary.html")) - (oup (if (and (common:file-exists? html-dir) - (directory? html-dir) - (file-write-access? html-dir)) - (open-output-file html-path) - #f))) - ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat) - (if oup - (begin - (s:output-new - oup - (s:html tests:css-jscript-block - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (s:h1 "Summary for " (string-intersperse run-dir "/")) - ;; top list - (s:ul 'id "LinkedList1" 'class "LinkedList" - (s:li - "Tests" - (common:htree->html tests-htree - '() - (lambda (x p) - (let* ((targ-path (string-intersperse p "/")) - (test-name (car p)) - (item-path ;; (if (> (length p) 2) ;; test-name + run-name - (string-intersperse p "/")) - (full-targ (conc html-dir "/" targ-path)) - (std-file (conc full-targ "/test-summary.html")) - (alt-file (conc full-targ "/megatest-rollup-" test-name ".html")) - (html-file (if (common:file-exists? alt-file) - alt-file - std-file)) - (run-name (car (reverse p)))) - (if (and (not (common:file-exists? full-targ)) - (directory? full-targ) - (file-write-access? full-targ)) - (tests:summarize-test - run-id - (rmt:get-test-id run-id test-name item-path))) - (if (common:file-exists? full-targ) - (s:a run-name 'href html-file) - (begin - (debug:print 0 *default-log-port* "ERROR: can't access " full-targ) - (conc "No summary for " run-name))))) - )))))) - (close-output-port oup))))) - runs) - #t) - #f))) - - - - - - - -;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! -;; -;; get a pretty table to summarize steps -;; -;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f)) -(define (tests:process-steps-table steps);; db test-id #!key (work-area #f)) -;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) - ;; organise the steps for better readability - (let ((res (make-hash-table))) - (for-each - (lambda (step) - (debug:print 6 *default-log-port* "step=" step) - (let ((record (hash-table-ref/default - res - (tdb:step-get-stepname step) - ;; 0 1 2 3 4 5 6 7 - ;; stepname start end status Duration Logfile Comment first-id - (vector (tdb:step-get-stepname step) "" "" "" "" "" "" #f)))) - (debug:print 6 *default-log-port* "record(before) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)) - (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set - (case (string->symbol (tdb:step-get-state step)) - ((start)(vector-set! record 1 (tdb:step-get-event_time step)) - (vector-set! record 3 (if (equal? (vector-ref record 3) "") - (tdb:step-get-status step))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - ((end) - (vector-set! record 2 (any->number (tdb:step-get-event_time step))) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) - (endt (any->number (vector-ref record 2)))) - (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) - ", startt=" startt ", endt=" endt - ", get-status: " (tdb:step-get-status step)) - (if (and (number? startt)(number? endt)) - (seconds->hr-min-sec (- endt startt)) "-1"))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step))) - (if (> (string-length (tdb:step-get-comment step)) - 0) - (vector-set! record 6 (tdb:step-get-comment step)))) - (else - (vector-set! record 2 (tdb:step-get-state step)) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (tdb:step-get-event_time step)) - (vector-set! record 6 (tdb:step-get-comment step)))) - (hash-table-set! res (tdb:step-get-stepname step) record) - (debug:print 6 *default-log-port* "record(after) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)))) - ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) - (sort steps (lambda (a b) - (cond - ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) - ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) - (< (tdb:step-get-id a) (tdb:step-get-id b))) - (else #f))))) - res)) - -;; -;; -(define (tests:get-compressed-steps run-id test-id) - (let* ((steps-data (rmt:get-steps-for-test run-id test-id)) ;; 0 1 2 3 4 5 6 7 - (comprsteps (tests:process-steps-table steps-data))) ;; # - (map (lambda (x) - ;; take advantage of the \n on time->string - (vector ;; we are constructing basically the original vector but collapsing start end records - (vector-ref x 0) ;; id 0 - (let ((s (vector-ref x 1))) - (if (number? s)(seconds->time-string s) s)) ;; starttime 1 - (let ((s (vector-ref x 2))) - (if (number? s)(seconds->time-string s) s)) ;; endtime 2 - (vector-ref x 3) ;; status 3 - (vector-ref x 4) ;; duration 4 - (vector-ref x 5) ;; logfile 5 - (vector-ref x 6) ;; comment 6 - (vector-ref x 7))) ;; id 7 - (sort (hash-table-values comprsteps) - (lambda (a b) - (let ((time-a (vector-ref a 1)) - (time-b (vector-ref b 1)) - (id-a (vector-ref a 7)) - (id-b (vector-ref b 7))) - (if (and (number? time-a)(number? time-b)) - (if (< time-a time-b) - #t - (if (eq? time-a time-b) - (< id-a id-b) - ;; (stringwork-week/day-time - (db:test-get-event_time test-dat))) - (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) - (s:h3 "Log files") - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) - (map (lambda (step-dat) - (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) - (s:td (tdb:steps-table-get-start step-dat)) - (s:td (tdb:steps-table-get-end step-dat)) - (s:td (tdb:steps-table-get-status step-dat)) - (s:td (tdb:steps-table-get-runtime step-dat)) - (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) - (s:a 'href step-log step-log))))) - steps-dat)) - ))) - (close-output-port oup))))) - - -;; MUST BE CALLED local! -;; -(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '())) - ;; BUG: Move the values derived from args to parameters and push to megatest.scm - (let* ((testpatt (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) - (statepatt (or (args:get-arg "-state") (args:get-arg ":state") "%")) - (statuspatt (or (args:get-arg "-status") (args:get-arg ":status") "%")) - (runname (or (args:get-arg "-runname") (args:get-arg ":runname") "%")) - (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res - testpatt - statepatt - statuspatt - runname))) - (if fnamepatt - (apply append - (map (lambda (p) - (if (directory-exists? p) - (let ((glob-query (conc p "/" fnamepatt))) - (handle-exceptions - exn - (with-input-from-pipe - (conc "echo " glob-query) - read-lines) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar - (glob glob-query))) - '())) - paths-from-db)) - paths-from-db))) - - -;;====================================================================== -;; Gather data from test/task specifications -;;====================================================================== - -;; (define (tests:get-valid-tests testsdir test-patts) ;; #!key (test-names '())) -;; (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*"))))) -;; (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests)) -;; (delete-duplicates -;; (filter (lambda (testname) -;; (tests:match test-patts testname #f)) -;; (map (lambda (testp) -;; (last (string-split testp "/"))) -;; tests))))) - -(define (tests:get-test-path-from-environment) - (if (and (getenv "MT_LINKTREE") - (getenv "MT_TARGET") - (getenv "MT_RUNNAME") - (getenv "MT_TEST_NAME") - (getenv "MT_ITEMPATH")) - (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - (getenv "MT_TEST_NAME") - (if (and (getenv "MT_ITEMPATH") - (not (string=? "" (getenv "MT_ITEMPATH")))) - (conc "/" (getenv "MT_ITEMPATH")) - "")) - #f)) - -;; if .testconfig exists in test directory read and return it -;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" -;; else read the testconfig file -;; if have path to test directory save the config as .testconfig and return it -;; -(define (tests:get-testconfig test-name item-path test-registry system-allowed - #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f)) - (let* ((use-cache (common:use-cache?)) - (cache-path (tests:get-test-path-from-environment)) - (cache-file (and cache-path (conc cache-path "/.testconfig"))) - (cache-exists (and cache-file - (not force-create) ;; if force-create then pretend there is no cache to read - (common:file-exists? cache-file))) - (cached-dat (if (and (not force-create) - cache-exists - use-cache) - (handle-exceptions - exn - #f ;; any issues, just give up with the cached version and re-read - (configf:read-alist cache-file)) - #f)) - (test-full-name (if (and item-path (not (string-null? item-path))) - (conc test-name "/" item-path) - test-name))) - (if cached-dat - cached-dat - (let ((dat (hash-table-ref/default *testconfigs* test-full-name #f))) - (if (and dat ;; have a locally cached version - (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data? - dat - ;; no cached data available - (let* ((treg (or test-registry - (tests:get-all))) - (test-path (or (hash-table-ref/default treg test-name #f) - (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - test-name "/" item-path)) - (local-tcfg (conc local-tcdir "/testconfig"))) - (if (common:file-exists? local-tcfg) - local-tcdir - #f)) - (conc *toppath* "/tests/" test-name))) - (test-configf (conc test-path "/testconfig")) - (testexists (let loopa ((tries-left 30)) - (cond - ( - (and (common:file-exists? test-configf)(file-read-access? test-configf)) - #t) - ( - (common:file-exists? test-configf) - (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf) - #f) - ( - (and wait-a-minute (> tries-left 0)) - (thread-sleep! 10) - (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires - (loopa (sub1 tries-left))) - (else - (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires - #f)))) - (tcfg (if testexists - (read-config test-configf #f system-allowed - environ-patt: (if system-allowed - "pre-launch-env-vars" - #f)) - #f))) - (if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data - (if tcfg (hash-table-set! *testconfigs* test-full-name tcfg)) - (if (and testexists - cache-file - (file-write-access? cache-path) - allow-write-cache) - (let ((tpath (conc cache-path "/.testconfig"))) - (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) - (if (and tcfg (not (common:in-running-test?))) - (configf:write-alist tcfg tpath)))) - tcfg)))))) - -;; sort tests by priority and waiton -;; Move test specific stuff to a test unit FIXME one of these days -(define (tests:sort-by-priority-and-waiton test-records) - (if (eq? (hash-table-size test-records) 0) - '() - (let* ((mungepriority (lambda (priority) - (if priority - (let ((tmp (any->number priority))) - (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) - 0))) - (all-tests (hash-table-keys test-records)) - (all-waited-on (let loop ((hed (car all-tests)) - (tal (cdr all-tests)) - (res '())) - (let* ((trec (hash-table-ref test-records hed)) - (waitons (or (tests:testqueue-get-waitons trec) '()))) - (if (null? tal) - (append res waitons) - (loop (car tal)(cdr tal)(append res waitons)))))) - (sort-fn1 - (lambda (a b) - (let* ((a-record (hash-table-ref test-records a)) - (b-record (hash-table-ref test-records b)) - (a-waitons (or (tests:testqueue-get-waitons a-record) '())) - (b-waitons (or (tests:testqueue-get-waitons b-record) '())) - (a-config (tests:testqueue-get-testconfig a-record)) - (b-config (tests:testqueue-get-testconfig b-record)) - (a-raw-pri (configf:lookup a-config "requirements" "priority")) - (b-raw-pri (configf:lookup b-config "requirements" "priority")) - (a-priority (mungepriority a-raw-pri)) - (b-priority (mungepriority b-raw-pri))) - (tests:testqueue-set-priority! a-record a-priority) - (tests:testqueue-set-priority! b-record b-priority) - ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) - (cond - ;; is - ((member a b-waitons) ;; is b waiting on a? - ;; (debug:print 0 *default-log-port* "case1") - #t) - ((member b a-waitons) ;; is a waiting on b? - ;; (debug:print 0 *default-log-port* "case2") - #f) - ((and (not (null? a-waitons)) ;; both have waitons - do not disturb - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case2.1") - #t) - ((and (null? a-waitons) ;; no waitons for a but b has waitons - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case3") - #f) - ((and (not (null? a-waitons)) ;; a has waitons but b does not - (null? b-waitons)) - ;; (debug:print 0 *default-log-port* "case4") - #t) - ((not (eq? a-priority b-priority)) ;; use - (> a-priority b-priority)) - (else - ;; (debug:print 0 *default-log-port* "case5") - (string>? a b)))))) - - (sort-fn2 - (lambda (a b) - (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) - (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) - ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) - ;; (debug:print "dot-res=" dot-res)) - ;; (let ((data (map cdr (filter - ;; (lambda (x)(equal? "node" (car x))) - ;; (map string-split (tests:easy-dot test-records "plain")))))) - ;; (map car (sort data (lambda (a b) - ;; (> (string->number (caddr a))(string->number (caddr b))))))) - ;; )) - (sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table - -(define (tests:easy-dot test-records outtype) - (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX")))) - (let ((all-testnames (hash-table-keys test-records)) - (temp-port (open-output-file* fd))) - ;; (format temp-port "This file is ~A.~%" temp-path) - (format temp-port "digraph tests {\n") - (format temp-port " size=4,8\n") - ;; (format temp-port " splines=none\n") - (for-each - (lambda (testname) - (let* ((testrec (hash-table-ref test-records testname)) - (waitons (or (tests:testqueue-get-waitons testrec) '()))) - (for-each - (lambda (waiton) - (format temp-port (conc " " waiton " -> " testname " [splines=ortho]\n"))) - waitons))) - all-testnames) - (format temp-port "}\n") - (close-output-port temp-port) - (with-input-from-pipe - (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path) - (lambda () - (let ((res (read-lines))) - ;; (delete-file temp-path) - res)))))) - -(define (tests:write-dot-file test-records fname sizex sizey) - (if (file-write-access? (pathname-directory fname)) - (with-output-to-file fname - (lambda () - (map print (tests:tests->dot test-records sizex sizey)))))) - -(define (tests:tests->dot test-records sizex sizey) - (let ((all-testnames (hash-table-keys test-records))) - (if (null? all-testnames) - '() - (let loop ((hed (car all-testnames)) - (tal (cdr all-testnames)) - (res (list "digraph tests {" - (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";") - " ratio=0.95;" - ))) - (let* ((testrec (hash-table-ref test-records hed)) - (waitons (or (tests:testqueue-get-waitons testrec) '())) - (newres (append res - (if (null? waitons) - (list (conc " \"" hed "\" [shape=box];")) - (map (lambda (waiton) - (conc " \"" waiton "\" -> \"" hed "\" [shape=box];")) - waitons) - )))) - (if (null? tal) - (append newres (list "}")) - (loop (car tal)(cdr tal) newres) - )))))) - -;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain") - -(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats - (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype)))) - (with-output-to-port oup - (lambda () - (map print indat))) - (close-output-port oup) - (let ((res (with-input-from-port inp - (lambda () - (read-lines))))) - (close-input-port inp) - res))) - -;; read data from tmp file or create if not exists -;; if exists regen in background -;; -(define (tests:lazy-dot testrecords outtype sizex sizey) - (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot")) - (fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat"))) - (tests:write-dot-file testrecords dfile sizex sizey) - (if (common:file-exists? fname) - (let ((res (with-input-from-file fname - (lambda () - (read-lines))))) - (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&")) - res) - (begin - (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname)) - (with-input-from-file fname - (lambda () - (read-lines))))))) - - -;; for each test: -;; -(define (tests:filter-non-runnable run-id testkeynames testrecordshash) - (let ((runnables '())) - (for-each - (lambda (testkeyname) - (let* ((test-record (hash-table-ref testrecordshash testkeyname)) - (test-name (tests:testqueue-get-testname test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) - (item-path (tests:testqueue-get-item_path test-record)) - (waitons (tests:testqueue-get-waitons test-record)) - (keep-test #t) - (test-id (rmt:get-test-id run-id test-name item-path)) - (tdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) - (if tdat - (begin - ;; Look at the test state and status - (if (or (and (member (db:test-get-status tdat) - '("PASS" "WARN" "WAIVED" "CHECK" "SKIP")) - (equal? (db:test-get-state tdat) "COMPLETED")) - (member (db:test-get-state tdat) - '("INCOMPLETE" "KILLED"))) - (set! keep-test #f)) - - ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test - ;; from the runnable list - (if keep-test - (for-each (lambda (waiton) - ;; for now we are waiting only on the parent test - (let* ((parent-test-id (rmt:get-test-id run-id waiton "")) - (wtdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) - (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED") - (member (db:test-get-status wtdat) '("FAIL" "ABORT"))) - (member (db:test-get-status wtdat) '("KILLED")) - (member (db:test-get-state wtdat) '("INCOMPETE"))) - ;; (if (or (member (db:test-get-status wtdat) - ;; '("FAIL" "KILLED")) - ;; (member (db:test-get-state wtdat) - ;; '("INCOMPETE"))) - (set! keep-test #f)))) ;; no point in running this one again - waitons)))) - (if keep-test (set! runnables (cons testkeyname runnables))))) - testkeynames) - runnables)) - -;;====================================================================== -;; refactoring this block into tests:get-full-data from line 263 of runs.scm -;;====================================================================== -;; hed is the test name -;; test-records is a hash of test-name => test record -(define (tests:get-full-data test-names test-records required-tests all-tests-registry) - (if (not (null? test-names)) - (let loop ((hed (car test-names)) - (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc - (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") - ;; don't know item-path at this time, let the testconfig get the top level testconfig - (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) - (waitons (let ((instr (if config - (configf:lookup config "requirements" "waiton") - (begin ;; No config means this is a non-existant test - (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.") - "")))) - (debug:print-info 8 *default-log-port* "waitons string is " instr) - (string-split (cond - ((procedure? instr) - (let ((res (instr))) - (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed) - res)) - ((string? instr) instr) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed) - "")))))) - (if (not config) ;; this is a non-existant test called in a waiton. - (if (null? tal) - test-records - (loop (car tal)(cdr tal))) - (begin - (debug:print-info 8 *default-log-port* "waitons: " waitons) - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member hed waitons) - (begin - (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!") - (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)))) - - ;; (items (items:get-items-from-config config))) - (if (not (hash-table-ref/default test-records hed #f)) - (hash-table-set! test-records - hed (vector hed ;; 0 - config ;; 1 - waitons ;; 2 - (configf:lookup config "requirements" "priority") ;; priority 3 - (let ((items (hash-table-ref/default config "items" #f)) ;; items 4 - (itemstable (hash-table-ref/default config "itemstable" #f))) - ;; if either items or items table is a proc return it so test running - ;; process can know to call items:get-items-from-config - ;; if either is a list and none is a proc go ahead and call get-items - ;; otherwise return #f - this is not an iterated test - (cond - ((procedure? items) - (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") - items) ;; calc later - ((procedure? itemstable) - (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") - itemstable) ;; calc later - ((filter (lambda (x) - (let ((val (car x))) - (if (procedure? val) val #f))) - (append (if (list? items) items '()) - (if (list? itemstable) itemstable '()))) - 'have-procedure) - ((or (list? items)(list? itemstable)) ;; calc now - (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" - " items: " items " itemstable: " itemstable) - (items:get-items-from-config config)) - (else #f))) ;; not iterated - #f ;; itemsdat 5 - #f ;; spare - used for item-path - ))) - (for-each - (lambda (waiton) - (if (and waiton (not (member waiton test-names))) - (begin - (set! required-tests (cons waiton required-tests)) - (set! test-names (cons waiton test-names))))) ;; was an append, now a cons - waitons) - (let ((remtests (delete-duplicates (append waitons tal)))) - (if (not (null? remtests)) - (loop (car remtests)(cdr remtests)) - test-records)))))))) - -;;====================================================================== -;; test steps -;;====================================================================== - -;; teststep-set-status! used to be here - -(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) - (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) - (and testdat - (equal? (test:get-state testdat) "KILLREQ")))) - -(define (test:tdb-get-rundat-count tdb) - (if tdb - (let ((res 0)) - (sqlite3:for-each-row - (lambda (count) - (set! res count)) - tdb - "SELECT count(id) FROM test_rundat;") - res)) - 0) - -(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) - (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) - (if (and cpuload diskfree) - (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) - (if minutes - (rmt:general-call 'update-run-duration run-id minutes test-id)) - (if (and uname hostname) - (rmt:general-call 'update-uname-host run-id uname hostname test-id))) - -;; This one is for running with no db access (i.e. via rmt: internally) -(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) -;; (define (tests:set-full-meta-info test-id run-id minutes work-area) -;; (let ((remtries 10)) - (let* ((cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (uname (get-uname "-srvpio")) - (hostname (get-host-name))) - (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) - -;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) -#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) - (let* ((cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (remtries 10)) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (print-call-chain (current-error-port)) - (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") - (set! remtries (- remtries 1)) - (thread-sleep! 10) - (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)))) - (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) - ))) - -;;====================================================================== -;; A R C H I V I N G -;;====================================================================== - -(define (test:archive db test-id) - #f) - -(define (test:archive-tests db keynames target) - #f) - Index: testsmod.scm ================================================================== --- testsmod.scm +++ testsmod.scm @@ -19,42 +19,41 @@ ;;====================================================================== (declare (unit testsmod)) (declare (uses commonmod)) (declare (uses mtargs)) +(declare (uses servermod)) +(declare (uses mtconfigf)) +(declare (uses itemsmod)) +(declare (uses dbmod)) (module testsmod * -(import scheme chicken data-structures extras) +(import scheme chicken data-structures extras files) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable - (prefix mtconfigf configf:) +(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 + format ports srfi-1 matchable + directory-utils regex srfi-13 commonmod + servermod + itemsmod + dbmod + (prefix mtconfigf configf:) (prefix mtargs args:)) + +(include "run_records.scm") +(include "test_records.scm") +(include "db_records.scm") (define *java-script-lib* #f) (define (init-java-script-lib) (set! *java-script-lib* (conc (common:get-install-area) "/share/js/jquery-3.1.0.slim.min.js")) ) -;; A routine to map itempaths using a itemmap -;; patha and pathb must be strings or this will fail -;; -;; path-b is waiting on path-a -;; -(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) - (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) - (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) - (if itemmap - (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) - (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) - (equal? path-a path-b-mapped)) - (equal? path-b path-a)))) - ;; A routine to convert test/itempath using a itemmap ;; NOTE: to process only an itempath (i.e. no prepended testname) ;; just call db:multi-pattern-apply ;; (define (db:convert-test-itempath path-in itemmap) @@ -63,18 +62,11 @@ (test-name (if (null? path-parts) "" (car path-parts))) (item-path (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/"))) (conc test-name "/" (db:multi-pattern-apply item-path itemmap)))) -;;====================================================================== -;; Run keys, these are used to hierarchially organise tests and run areas -;;====================================================================== - -(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... - (string-intersperse keys ",")) - -(define (args:usage . a) #f) +#; (define (args:usage . a) #f) ;;====================================================================== ;; key <=> target routines ;;====================================================================== @@ -95,72 +87,10 @@ vals) (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys)) vals) (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target."))) -;; given the keys (a list of vectors or a list of keys) and a target return a keyval list -;; keyval list ( (key1 val1) (key2 val2) ...) -(define (keys:target->keyval keys target) - (let* ((targlist (string-split target "/")) - (numkeys (length keys)) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "")) - targlist))) - (map (lambda (key targ) - (list key targ)) - keys targtweaked))) - -;;====================================================================== -;; config file related routines -;;====================================================================== - -(define keys:config-get-fields common:get-fields) -(define (keys:make-key/field-string confdat) - (let ((fields (configf:get-section confdat "fields"))) - (string-join - (map (lambda (field)(conc (car field) " " (cadr field))) - fields) - ","))) - -;; patterns are: -;; "rx1" "replacement1"\n -;; "rx2" "replacement2" -;; etc. -;; -(define (db:multi-pattern-apply item-path itemmap) - (let ((all-patts (string-split itemmap "\n"))) - (if (null? all-patts) - item-path - (let loop ((hed (car all-patts)) - (tal (cdr all-patts)) - (res item-path)) - (let* ((parts (string-split hed)) - (patt (car parts)) - - (repl (if (> (length parts) 1)(cadr parts) "")) - - (newr (if (and patt repl) - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) - res) - (string-substitute patt repl res)) - - - ) - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) - res)))) - (if (null? tal) - newr - (loop (car tal)(cdr tal) newr))))))) - ;; given waiting-test that is waiting on waiton-test extend test-patt appropriately ;; ;; genlib/testconfig sim/testconfig ;; genlib/sch sim/sch/cell1 ;; @@ -215,121 +145,18 @@ (cons waiton-test patts)))) (string-intersperse (delete-duplicates new-patts) ","))))) -;; tests:glob-like-match -(define (tests:glob-like-match patt str) - (let ((like (substring-index "%" patt))) - (let* ((notpatt (equal? (substring-index "~" patt) 0)) - (newpatt (if notpatt (substring patt 1) patt)) - (finpatt (if like - (string-substitute (regexp "%") ".*" newpatt #f) - (string-substitute (regexp "\\*") ".*" newpatt #f))) - (res #f)) - ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt) - (set! res (string-match (regexp finpatt (if like #t #f)) str)) - (if notpatt (not res) res)))) - -;; if itempath is #f then look only at the testname part -;; -(define (tests:match patterns testname itempath #!key (required '())) - (if (string? patterns) - (let ((patts (append (string-split patterns ",") required))) - (if (null? patts) ;;; no pattern(s) means no match - #f - (let loop ((patt (car patts)) - (tal (cdr patts))) - ;; (print "loop: patt: " patt ", tal " tal) - (if (string=? patt "") - #f ;; nothing ever matches empty string - policy - (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) - (test-patt (cadr patt-parts)) - (item-patt (cadddr patt-parts))) - ;; special case: test vs. test/ - ;; test => "test" "%" - ;; test/ => "test" "" - (if (and (not (substring-index "/" patt)) ;; no slash in the original - (or (not item-patt) - (equal? item-patt ""))) ;; should always be true that item-patt is "" - (set! item-patt "%")) - ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) - (if (and (tests:glob-like-match test-patt testname) - (or (not itempath) - (tests:glob-like-match (if item-patt item-patt "") itempath))) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))))) - -;; if itempath is #f then look only at the testname part -;; -(define (tests:match->sqlqry patterns) - (if (string? patterns) - (let ((patts (string-split patterns ","))) - (if (null? patts) ;;; no pattern(s) means no match, we will do no query - #f - (let loop ((patt (car patts)) - (tal (cdr patts)) - (res '())) - ;; (print "loop: patt: " patt ", tal " tal) - (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) - (test-patt (cadr patt-parts)) - (item-patt (cadddr patt-parts)) - (test-qry (db:patt->like "testname" test-patt)) - (item-qry (db:patt->like "item_path" item-patt)) - (qry (conc "(" test-qry " AND " item-qry ")"))) - ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) - (if (null? tal) - (string-intersperse (append (reverse res)(list qry)) " OR ") - (loop (car tal)(cdr tal)(cons qry res))))))) - #f)) - -;; keys list to key1,key2,key3 ... -(define (runs:get-std-run-fields keys remfields) - (let* ((header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (list keystr header))) - -;; make a query (fieldname like 'patt1' OR fieldname -(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) - (let ((patts (if (string? pattstr) - (string-split pattstr ",") - '("%")))) - (string-intersperse (map (lambda (patt) - (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) - (conc fieldname " " wildtype " '" patt "'"))) - (if (null? patts) - '("") - patts)) - comparator))) - ;; Call this one to do all the work and get a standardized list of tests ;; gets paths from configs and finds valid tests ;; returns hash of testname --> fullpath ;; (define (tests:get-all) (let* ((test-search-path (tests:get-tests-search-path *configdat*))) (tests:get-valid-tests (make-hash-table) test-search-path))) -(define (tests:get-tests-search-path cfgdat) - (let ((paths (let ((section (if cfgdat - (configf:get-section cfgdat "tests-paths") - #f))) - (if section - (map cadr section) - '())))) - (filter (lambda (d) - (if (directory-exists? d) - d - (begin - (if (common:low-noise-print 60 "tests:get-tests-search-path" d) - (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) - #f))) - (append paths (list (conc *toppath* "/tests")))))) - (define (tests:get-valid-tests test-registry tests-paths) (if (null? tests-paths) test-registry (let loop ((hed (car tests-paths)) (tal (cdr tests-paths))) @@ -374,49 +201,615 @@ '()) (if itemmap-table itemmap-table '())))) -;; given a list of itemmaps (testname . map), return the first match +;;====================================================================== +;; Tests +;;====================================================================== + +;; return items given config +;; +(define (tests:get-items tconfig) + (let ((items (hash-table-ref/default tconfig "items" #f)) ;; items 4 + (itemstable (hash-table-ref/default tconfig "itemstable" #f))) + ;; if either items or items table is a proc return it so test running + ;; process can know to call items:get-items-from-config + ;; if either is a list and none is a proc go ahead and call get-items + ;; otherwise return #f - this is not an iterated test + (cond + ((procedure? items) + (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") + items) ;; calc later + ((procedure? itemstable) + (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") + itemstable) ;; calc later + ((filter (lambda (x) + (let ((val (car x))) + (if (procedure? val) val #f))) + (append (if (list? items) items '()) + (if (list? itemstable) itemstable '()))) + 'have-procedure) + ((or (list? items)(list? itemstable)) ;; calc now + (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" + " items: " items " itemstable: " itemstable) + (items:get-items-from-config tconfig)) + (else #f)))) ;; not iterated + + +;; returns waitons waitors tconfigdat +;; +(define (tests:get-waitons test-name all-tests-registry) + (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t) + (let ((instr (if config + (configf:lookup config "requirements" "waiton") + (begin ;; No config means this is a non-existant test + (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") + (exit 1)))) + (instr2 (if config + (configf:lookup config "requirements" "waitor") + ""))) + (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2) + (let ((newwaitons + (string-split (cond + ((procedure? instr) ;; here + (let ((res (instr))) + (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name) + res)) + ((string? instr) instr) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) + "")))) + (newwaitors + (string-split (cond + ((procedure? instr2) + (let ((res (instr2))) + (debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name) + res)) + ((string? instr2) instr2) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) + ""))))) + (values + ;; the waitons + (filter (lambda (x) + (if (hash-table-ref/default all-tests-registry x #f) + #t + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) + #f))) + newwaitons) + (filter (lambda (x) + (if (hash-table-ref/default all-tests-registry x #f) + #t + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) + #f))) + newwaitors) + config))))) + +;; Check for waiver eligibility +;; +(define (tests:check-waiver-eligibility testdat prev-testdat) + (let* ((test-registry (make-hash-table)) + (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) + (test-rundir ;; (sdb:qry 'passstr + (db:test-get-rundir testdat)) ;; ) + (prev-rundir ;; (sdb:qry 'passstr + (db:test-get-rundir prev-testdat)) ;; ) + (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) + (waiver-rx (regexp "^(\\S+)\\s+(.*)$")) + (diff-rule "diff %file1% %file2%") + (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html")) + (if (not (common:file-exists? test-rundir)) + (begin + (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver") + #f) + (begin + (push-directory test-rundir) + (let ((result (if (null? waivers) + #f + (let loop ((hed (car waivers)) + (tal (cdr waivers))) + (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"") + (let* ((waiver (configf:lookup testconfig "waivers" hed)) + (wparts (if waiver (string-match waiver-rx waiver) #f)) + (waiver-rule (if wparts (cadr wparts) #f)) + (waiver-glob (if wparts (caddr wparts) #f)) + (logpro-file (if waiver + (let ((fname (conc hed ".logpro"))) + (if (common:file-exists? fname) + fname + (begin + (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff") + #f))) + #f)) + ;; if rule by name of waiver-rule is found in testconfig - use it + ;; else if waivername.logpro exists use logpro-rule + ;; else default to diff-rule + (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule))) + (if rule + rule + (if logpro-file + logpro-rule + (begin + (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule") + diff-rule))))) + ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t) + (processed-cmd (string-substitute + "%file1%" (conc test-rundir "/" waiver-glob) + (string-substitute + "%file2%" (conc prev-rundir "/" waiver-glob) + (string-substitute + "%waivername%" hed rule-string #t) #t) #t)) + (res #f)) + (debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"") + (if (eq? (system processed-cmd) 0) + (if (null? tal) + #t + (loop (car tal)(cdr tal))) + #f)))))) + (pop-directory) + result))))) + + + + + + + + + + + + + + + + + + + + + + + + +;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! +;; +;; get a pretty table to summarize steps +;; +;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f)) +(define (tests:process-steps-table steps);; db test-id #!key (work-area #f)) +;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) + ;; organise the steps for better readability + (let ((res (make-hash-table))) + (for-each + (lambda (step) + (debug:print 6 *default-log-port* "step=" step) + (let ((record (hash-table-ref/default + res + (tdb:step-get-stepname step) + ;; 0 1 2 3 4 5 6 7 + ;; stepname start end status Duration Logfile Comment first-id + (vector (tdb:step-get-stepname step) "" "" "" "" "" "" #f)))) + (debug:print 6 *default-log-port* "record(before) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)) + (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set + (case (string->symbol (tdb:step-get-state step)) + ((start)(vector-set! record 1 (tdb:step-get-event_time step)) + (vector-set! record 3 (if (equal? (vector-ref record 3) "") + (tdb:step-get-status step))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + ((end) + (vector-set! record 2 (any->number (tdb:step-get-event_time step))) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) + (endt (any->number (vector-ref record 2)))) + (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) + ", startt=" startt ", endt=" endt + ", get-status: " (tdb:step-get-status step)) + (if (and (number? startt)(number? endt)) + (seconds->hr-min-sec (- endt startt)) "-1"))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step))) + (if (> (string-length (tdb:step-get-comment step)) + 0) + (vector-set! record 6 (tdb:step-get-comment step)))) + (else + (vector-set! record 2 (tdb:step-get-state step)) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (tdb:step-get-event_time step)) + (vector-set! record 6 (tdb:step-get-comment step)))) + (hash-table-set! res (tdb:step-get-stepname step) record) + (debug:print 6 *default-log-port* "record(after) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)))) + ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) + (sort steps (lambda (a b) + (cond + ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) + ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) + (< (tdb:step-get-id a) (tdb:step-get-id b))) + (else #f))))) + res)) + +;;====================================================================== +;; Gather data from test/task specifications +;;====================================================================== + +;; (define (tests:get-valid-tests testsdir test-patts) ;; #!key (test-names '())) +;; (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*"))))) +;; (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests)) +;; (delete-duplicates +;; (filter (lambda (testname) +;; (tests:match test-patts testname #f)) +;; (map (lambda (testp) +;; (last (string-split testp "/"))) +;; tests))))) + +(define (tests:get-test-path-from-environment) + (if (and (getenv "MT_LINKTREE") + (getenv "MT_TARGET") + (getenv "MT_RUNNAME") + (getenv "MT_TEST_NAME") + (getenv "MT_ITEMPATH")) + (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + (getenv "MT_TEST_NAME") + (if (and (getenv "MT_ITEMPATH") + (not (string=? "" (getenv "MT_ITEMPATH")))) + (conc "/" (getenv "MT_ITEMPATH")) + "")) + #f)) + +;; if .testconfig exists in test directory read and return it +;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" +;; else read the testconfig file +;; if have path to test directory save the config as .testconfig and return it +;; +(define (tests:get-testconfig test-name item-path test-registry system-allowed + #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f)) + (let* ((use-cache (common:use-cache?)) + (cache-path (tests:get-test-path-from-environment)) + (cache-file (and cache-path (conc cache-path "/.testconfig"))) + (cache-exists (and cache-file + (not force-create) ;; if force-create then pretend there is no cache to read + (common:file-exists? cache-file))) + (cached-dat (if (and (not force-create) + cache-exists + use-cache) + (handle-exceptions + exn + #f ;; any issues, just give up with the cached version and re-read + (configf:read-alist cache-file)) + #f)) + (test-full-name (if (and item-path (not (string-null? item-path))) + (conc test-name "/" item-path) + test-name))) + (if cached-dat + cached-dat + (let ((dat (hash-table-ref/default *testconfigs* test-full-name #f))) + (if (and dat ;; have a locally cached version + (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data? + dat + ;; no cached data available + (let* ((treg (or test-registry + (tests:get-all))) + (test-path (or (hash-table-ref/default treg test-name #f) + (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + test-name "/" item-path)) + (local-tcfg (conc local-tcdir "/testconfig"))) + (if (common:file-exists? local-tcfg) + local-tcdir + #f)) + (conc *toppath* "/tests/" test-name))) + (test-configf (conc test-path "/testconfig")) + (testexists (let loopa ((tries-left 30)) + (cond + ( + (and (common:file-exists? test-configf)(file-read-access? test-configf)) + #t) + ( + (common:file-exists? test-configf) + (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf) + #f) + ( + (and wait-a-minute (> tries-left 0)) + (thread-sleep! 10) + (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires + (loopa (sub1 tries-left))) + (else + (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires + #f)))) + (tcfg (if testexists + (configf:read-config test-configf #f system-allowed + environ-patt: (if system-allowed + "pre-launch-env-vars" + #f)) + #f))) + (if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data + (if tcfg (hash-table-set! *testconfigs* test-full-name tcfg)) + (if (and testexists + cache-file + (file-write-access? cache-path) + allow-write-cache) + (let ((tpath (conc cache-path "/.testconfig"))) + (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) + (if (and tcfg (not (common:in-running-test?))) + (configf:write-alist tcfg tpath)))) + tcfg)))))) + +;; sort tests by priority and waiton +;; Move test specific stuff to a test unit FIXME one of these days +(define (tests:sort-by-priority-and-waiton test-records) + (if (eq? (hash-table-size test-records) 0) + '() + (let* ((mungepriority (lambda (priority) + (if priority + (let ((tmp (any->number priority))) + (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) + 0))) + (all-tests (hash-table-keys test-records)) + (all-waited-on (let loop ((hed (car all-tests)) + (tal (cdr all-tests)) + (res '())) + (let* ((trec (hash-table-ref test-records hed)) + (waitons (or (tests:testqueue-get-waitons trec) '()))) + (if (null? tal) + (append res waitons) + (loop (car tal)(cdr tal)(append res waitons)))))) + (sort-fn1 + (lambda (a b) + (let* ((a-record (hash-table-ref test-records a)) + (b-record (hash-table-ref test-records b)) + (a-waitons (or (tests:testqueue-get-waitons a-record) '())) + (b-waitons (or (tests:testqueue-get-waitons b-record) '())) + (a-config (tests:testqueue-get-testconfig a-record)) + (b-config (tests:testqueue-get-testconfig b-record)) + (a-raw-pri (configf:lookup a-config "requirements" "priority")) + (b-raw-pri (configf:lookup b-config "requirements" "priority")) + (a-priority (mungepriority a-raw-pri)) + (b-priority (mungepriority b-raw-pri))) + (tests:testqueue-set-priority! a-record a-priority) + (tests:testqueue-set-priority! b-record b-priority) + ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) + (cond + ;; is + ((member a b-waitons) ;; is b waiting on a? + ;; (debug:print 0 *default-log-port* "case1") + #t) + ((member b a-waitons) ;; is a waiting on b? + ;; (debug:print 0 *default-log-port* "case2") + #f) + ((and (not (null? a-waitons)) ;; both have waitons - do not disturb + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case2.1") + #t) + ((and (null? a-waitons) ;; no waitons for a but b has waitons + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case3") + #f) + ((and (not (null? a-waitons)) ;; a has waitons but b does not + (null? b-waitons)) + ;; (debug:print 0 *default-log-port* "case4") + #t) + ((not (eq? a-priority b-priority)) ;; use + (> a-priority b-priority)) + (else + ;; (debug:print 0 *default-log-port* "case5") + (string>? a b)))))) + + (sort-fn2 + (lambda (a b) + (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) + (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) + ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) + ;; (debug:print "dot-res=" dot-res)) + ;; (let ((data (map cdr (filter + ;; (lambda (x)(equal? "node" (car x))) + ;; (map string-split (tests:easy-dot test-records "plain")))))) + ;; (map car (sort data (lambda (a b) + ;; (> (string->number (caddr a))(string->number (caddr b))))))) + ;; )) + (sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table + +(define (tests:easy-dot test-records outtype) + (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX")))) + (let ((all-testnames (hash-table-keys test-records)) + (temp-port (open-output-file* fd))) + ;; (format temp-port "This file is ~A.~%" temp-path) + (format temp-port "digraph tests {\n") + (format temp-port " size=4,8\n") + ;; (format temp-port " splines=none\n") + (for-each + (lambda (testname) + (let* ((testrec (hash-table-ref test-records testname)) + (waitons (or (tests:testqueue-get-waitons testrec) '()))) + (for-each + (lambda (waiton) + (format temp-port (conc " " waiton " -> " testname " [splines=ortho]\n"))) + waitons))) + all-testnames) + (format temp-port "}\n") + (close-output-port temp-port) + (with-input-from-pipe + (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path) + (lambda () + (let ((res (read-lines))) + ;; (delete-file temp-path) + res)))))) + +(define (tests:write-dot-file test-records fname sizex sizey) + (if (file-write-access? (pathname-directory fname)) + (with-output-to-file fname + (lambda () + (map print (tests:tests->dot test-records sizex sizey)))))) + +(define (tests:tests->dot test-records sizex sizey) + (let ((all-testnames (hash-table-keys test-records))) + (if (null? all-testnames) + '() + (let loop ((hed (car all-testnames)) + (tal (cdr all-testnames)) + (res (list "digraph tests {" + (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";") + " ratio=0.95;" + ))) + (let* ((testrec (hash-table-ref test-records hed)) + (waitons (or (tests:testqueue-get-waitons testrec) '())) + (newres (append res + (if (null? waitons) + (list (conc " \"" hed "\" [shape=box];")) + (map (lambda (waiton) + (conc " \"" waiton "\" -> \"" hed "\" [shape=box];")) + waitons) + )))) + (if (null? tal) + (append newres (list "}")) + (loop (car tal)(cdr tal) newres) + )))))) + +;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain") + +(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats + (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype)))) + (with-output-to-port oup + (lambda () + (map print indat))) + (close-output-port oup) + (let ((res (with-input-from-port inp + (lambda () + (read-lines))))) + (close-input-port inp) + res))) + +;; read data from tmp file or create if not exists +;; if exists regen in background ;; -(define (tests:lookup-itemmap itemmaps testname) - (let ((best-matches (filter (lambda (itemmap) - (tests:match (car itemmap) testname #f)) - itemmaps))) - (if (null? best-matches) - #f - (let ((res (car best-matches))) - ;; (debug:print 0 *default-log-port* "res=" res) - (cond - ((string? res) res) ;;; FIX THE ROOT CAUSE HERE .... - ((null? res) #f) - ((string? (cdr res)) (cdr res)) ;; it is a pair - ((string? (cadr res))(cadr res)) ;; it is a list - (else cadr res)))))) - -(define (mt:lazy-read-test-config test-name) - (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) - (if tconf - tconf - (let ((test-dirs (tests:get-tests-search-path *configdat*))) - (let loop ((hed (car test-dirs)) - (tal (cdr test-dirs))) - ;; Setting MT_LINKTREE here is almost certainly unnecessary. - (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) - (if (and (common:file-exists? tconfig-file) - (file-read-access? tconfig-file)) - (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (old-link-tree (get-environment-variable "MT_LINKTREE"))) - (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) - (let ((newtcfg (configf:read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] - (hash-table-set! *testconfigs* test-name newtcfg) - (if old-link-tree - (setenv "MT_LINKTREE" old-link-tree) - (unsetenv "MT_LINKTREE")) - newtcfg)) - (if (null? tal) - (begin - (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) - #f) - (loop (car tal)(cdr tal)))))))))) +(define (tests:lazy-dot testrecords outtype sizex sizey) + (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot")) + (fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat"))) + (tests:write-dot-file testrecords dfile sizex sizey) + (if (common:file-exists? fname) + (let ((res (with-input-from-file fname + (lambda () + (read-lines))))) + (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&")) + res) + (begin + (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname)) + (with-input-from-file fname + (lambda () + (read-lines))))))) + + +;;====================================================================== +;; refactoring this block into tests:get-full-data from line 263 of runs.scm +;;====================================================================== +;; hed is the test name +;; test-records is a hash of test-name => test record +(define (tests:get-full-data test-names test-records required-tests all-tests-registry) + (if (not (null? test-names)) + (let loop ((hed (car test-names)) + (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc + (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") + ;; don't know item-path at this time, let the testconfig get the top level testconfig + (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) + (waitons (let ((instr (if config + (configf:lookup config "requirements" "waiton") + (begin ;; No config means this is a non-existant test + (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.") + "")))) + (debug:print-info 8 *default-log-port* "waitons string is " instr) + (string-split (cond + ((procedure? instr) + (let ((res (instr))) + (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed) + res)) + ((string? instr) instr) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed) + "")))))) + (if (not config) ;; this is a non-existant test called in a waiton. + (if (null? tal) + test-records + (loop (car tal)(cdr tal))) + (begin + (debug:print-info 8 *default-log-port* "waitons: " waitons) + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member hed waitons) + (begin + (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!") + (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)))) + + ;; (items (items:get-items-from-config config))) + (if (not (hash-table-ref/default test-records hed #f)) + (hash-table-set! test-records + hed (vector hed ;; 0 + config ;; 1 + waitons ;; 2 + (configf:lookup config "requirements" "priority") ;; priority 3 + (let ((items (hash-table-ref/default config "items" #f)) ;; items 4 + (itemstable (hash-table-ref/default config "itemstable" #f))) + ;; if either items or items table is a proc return it so test running + ;; process can know to call items:get-items-from-config + ;; if either is a list and none is a proc go ahead and call get-items + ;; otherwise return #f - this is not an iterated test + (cond + ((procedure? items) + (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") + items) ;; calc later + ((procedure? itemstable) + (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") + itemstable) ;; calc later + ((filter (lambda (x) + (let ((val (car x))) + (if (procedure? val) val #f))) + (append (if (list? items) items '()) + (if (list? itemstable) itemstable '()))) + 'have-procedure) + ((or (list? items)(list? itemstable)) ;; calc now + (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" + " items: " items " itemstable: " itemstable) + (items:get-items-from-config config)) + (else #f))) ;; not iterated + #f ;; itemsdat 5 + #f ;; spare - used for item-path + ))) + (for-each + (lambda (waiton) + (if (and waiton (not (member waiton test-names))) + (begin + (set! required-tests (cons waiton required-tests)) + (set! test-names (cons waiton test-names))))) ;; was an append, now a cons + waitons) + (let ((remtests (delete-duplicates (append waitons tal)))) + (if (not (null? remtests)) + (loop (car remtests)(cdr remtests)) + test-records)))))))) + +;;====================================================================== +;; A R C H I V I N G +;;====================================================================== + +(define (test:archive db test-id) + #f) + +(define (test:archive-tests db keynames target) + #f) )