Megatest

rmtmod.scm at [5a118f1a51]
Login

File rmtmod.scm artifact 1cfe9c07c7 part of check-in 5a118f1a51


;;======================================================================
;; Copyright 2017, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit rmtmod))
(declare (uses debugprint))
;; (declare (uses debugprint.import))
(declare (uses commonmod))
;; (declare (uses commonmod.import))
(declare (uses dbfile))    ;; needed for records
(declare (uses dbmod))
;; (declare (uses tcp-transportmod))
;; (declare (uses tcp-transportmod.import))

;; (declare (uses apimod))
;; (declare (uses apimod.import))
;; (declare (uses ulex))

;; (include "ulex/ulex.scm")

(module rmtmod
	*
	
(import scheme chicken data-structures extras matchable srfi-69)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18)
(import commonmod dbfile debugprint) ;; (prefix commonmod cmod:))
(import dbmod
	;; tcp-transportmod
	)

;; (import apimod)
;; (import (prefix ulex ulex:))

(include "db_records.scm")

(defstruct alldat
  (areapath #f)
  (ulexdat  #f)
  )

;; hold the send-receive proc in this parameter
(define rmtmod:send-receive #f) ;; (make-parameter #f))

;;======================================================================
;; M I S C
;;======================================================================

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmtmod:send-receive 'general-call run-id (append (list stmtname run-id) params)))



;;======================================================================
;; import an sexpr file into the db
;;======================================================================

(define (rmt:import-sexpr sexpr-file)
  (if (file-exists? sexpr-file)
      (let* ((data (with-input-from-file sexpr-file read)))
	(for-each
	 (lambda (targ-dat)
	   (rmt:import-target targ-dat)) ;; ("target" ("run1" ("data" (1 ("field" . "value") ...
	 data))
      (let* ((msg (conc "ERROR: file "sexpr-file" not found")))
	(debug:print 0 *default-log-port* msg)
	(cons #f msg))))

(define (rmt:import-target targ-dat)
  (let* ((target (car targ-dat))
	 (data   (cdr targ-dat)))
    (for-each
     (lambda (run-dat)
       (rmt:import-run target run-dat)) ;; ("runname" ("data" ("testid" ("field" . "value") ...
     data)))

(define (rmt:import-run target run-dat)
  (let* ((runname    (car run-dat))
	 (all-dat    (cdr run-dat))
	 (tests-data (alist-ref "data" all-dat equal?))
	 (run-meta   (alist-ref "meta" all-dat equal?))
         (run-id     (string->number (alist-ref "id"   run-meta equal?))))

    (rmt:insert-run run-id target runname run-meta)
    (if (list? tests-data)
      (begin
        (debug:print 0 *default-log-port* "import-run: inserting " (length tests-data) " tests")
        (for-each
          (lambda (test-dat)
            (let* ((test-id  (car test-dat))
	      (test-rec (cdr test-dat)))
	      (rmt:insert-test run-id test-rec)))
         tests-data)
      )
      (debug:print 0 *default-log-port* "import-run: run has no tests")
    )
  )
)

;; insert run if not there, return id either way
(define (rmt:insert-run run-id target runname run-meta)
  ;; look for id, return if found
  (let* ((runs (rmtmod:send-receive 'simple-get-runs #f
				    ;;    runpatt count offset target last-update)
				    (list runname #f    #f     target #f))))
    (if (null? runs)
       (begin
        (debug:print 0 *default-log-port* "inserting run for runname " runname " target " target)
	(rmtmod:send-receive 'insert-run #f (list run-id target runname run-meta))
       )
       (begin
        (debug:print 0 *default-log-port* "Found run-id " (simple-run-id (car runs)) " for runname " runname " target " target)
	(simple-run-id (car runs))
       ))))

(define (rmt:insert-test run-id test-rec)
  (let* ((testname  (alist-ref "testname" test-rec equal?))
	 (item-path (alist-ref "item_path" test-rec equal?))
         (test-id (rmt:get-test-id run-id testname item-path))
         )
        (if test-id
          (debug:print 0 *default-log-port* "test "testname"/"item-path " already exists in run-id " run-id)
          (begin
            (debug:print 0 *default-log-port* " Insert test in run "run-id": "testname"/"item-path)
            (rmtmod:send-receive 'insert-test run-id test-rec)
          )
        )
  )
)

;;======================================================================
;;  T E S T S
;;======================================================================

;; Just some syntatic sugar
(define (rmt:register-test run-id test-name item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (number? test-id)
      (rmtmod:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:get-test-state-status-by-id run-id test-id)
  (rmtmod:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmtmod:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
;;   (assert (number? run-id) "FATAL: Run id required.")
;;   (let* ((test-path (if (string? work-area)
;; 			work-area
;; 			(rmt:test-get-rundir-from-test-id run-id test-id))))
;;     (debug:print 3 *default-log-port* "TEST PATH: " test-path)
;;     (open-test-db test-path)))

;; WARNING: This currently bypasses the transaction wrapped writes system
(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))

(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))

(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  (assert (number? run-id) "FATAL: Run id required.")
  ;; (if (number? run-id)
  (rmtmod:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
  ;;    (begin
  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
  ;;	(print-call-chain (current-error-port))
  ;;	'())))

(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))

;; get stuff via synchash 
(define (rmt:synchash-get run-id proc synckey keynum params)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  
;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))


;;======================================================================
;; Maintenance
;;======================================================================


(define (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)
  (rmtmod:send-receive 'get-toplevels-and-incompletes run-id (list run-id running-deadtime remotehoststart-deadtime)))

(define (rmt:get-status-from-final-status-file run-dir)
  (let ((infile (conc run-dir "/.final-status")))
    ;; first verify we are able to write the output file
    (if (not (file-read-access? infile))
        (begin 
	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
          #f
          )
	(let ((res (with-input-from-file infile read-lines)))
	  (if (null? res)
	      #f
	      (string-split (car res)))))))
  
;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
;;
;; NOT EASY TO MIGRATE TO db{file,mod}
;;
(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
          ;; The default running-deadtime is 720 seconds = 12 minutes.
          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
         (deadtime-trim (or ovr-deadtime cfg-deadtime))
         (server-start-allowance 200)
         (server-overloaded-budget 200)
         (launch-monitor-off-time (or test-stats-update-period 30))
         (launch-monitor-on-time-budget 30)
         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)

    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)

    (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)))
      (set! oldlaunched (list-ref dat 1))
      (set! toplevels   (list-ref dat 2))
      (set! incompleted (list-ref dat 0)))

    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
		      (length toplevels) " old LAUNCHED toplevel tests and "
		      (length incompleted) " tests marked RUNNING but apparently dead.")
  
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
    ;;
    ;; (db:delay-if-busy dbdat)
    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
      (if (> (length all-ids) 0)
	  (begin
	    ;; (launch:is-test-alive "localhost" 435)
	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
			 " as DEAD")
	    (for-each
             (lambda (test-id)
               (let* ((tinfo   (rmt:get-test-info-by-id run-id test-id))
		      (run-dir (db:test-get-rundir     tinfo))
		      (host    (db:test-get-host       tinfo))
		      (pid     (db:test-get-process_id tinfo))
		      (result (rmt:get-status-from-final-status-file run-dir)))
		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
		     (begin
		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
		       (rmt:set-state-status-and-roll-up-items
			run-id test-id 'foo "COMPLETED" "PASS"
			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
					  (commonmod:is-test-alive host pid))))
		       (if is-alive
			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
					" has a process on pid " pid ", NOT setting to DEAD.")
			   (begin
			     (debug:print 0 *default-log-port* "INFO: test " test-id
					  " final state/status is not COMPLETED/PASS. It is " result)
			     (rmt:set-state-status-and-roll-up-items
			      run-id test-id 'foo "COMPLETED" "DEAD"
			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
	     ;; call end of eud of run detection for posthook - from merge, is it needed?
	     ;; (launch:end-of-run-check run-id)
	     all-ids)
	    )))))

;;======================================================================
;; Misc
;;======================================================================

;; (define (rmtmod:wait-on-server-load run-id ttdat)
;;   (let* ((dbfname                 (dbmod:run-id->dbfname run-id))
;; 	 (get-lowest-thread-load
;; 	  (lambda ()
;; 	    (let* ((sdats (tt:get-server-info-sorted ttdat dbfname)))
;; 	      (car (map tt:get-server-threads sdats))))))
;;     (if ttdat
;; 	(let loop ()
;; 	  (if (> (get-lowest-thread-load) 5) ;; load is pretty high
;; 	      (begin
;; 		(debug:print 0 *default-log-port* "Servers appear overloaded, waiting...")
;; 		(thread-sleep! 1)
;; 		(loop))))
;; 	(debug:print 0 *default-log-port* "Can't wait on server load, *ttdat* not set"))))

)