Megatest

Diff
Login

Differences From Artifact [6b720dfd33]:

To Artifact [1cfe9c07c7]:


15
16
17
18
19
20
21


22






23

24
25

26
27
28
29
30
31
32

33

34
35
36








37
38
39
40
41
42
43
44
45
46









47


48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63















































































































































































64




65


66
67


68
69
70

71
72
73
74
75
76

77
78
79
80
81
82
83



















































































84
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

31
32

33
34
35
36
37
38
39

40
41
42



43
44
45
46
47
48
49
50
51
52
53
54
55
56




57
58
59
60
61
62
63
64
65
66
67
68
















69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

244
245
246
247
248
249
250


251
252



253






254







255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338







+
+

+
+
+
+
+
+
-
+

-
+






-
+

+
-
-
-
+
+
+
+
+
+
+
+






-
-
-
-
+
+
+
+
+
+
+
+
+

+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+

+
+
-
-
+
+
-
-
-
+
-
-
-
-
-
-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit rmtmod))
(declare (uses debugprint))
;; (declare (uses debugprint.import))
(declare (uses commonmod))
;; (declare (uses commonmod.import))
(declare (uses dbfile))    ;; needed for records
(declare (uses dbmod))
;; (declare (uses tcp-transportmod))
;; (declare (uses tcp-transportmod.import))

(declare (uses apimod))
;; (declare (uses apimod))
;; (declare (uses apimod.import))
(declare (uses ulex))
;; (declare (uses ulex))

;; (include "ulex/ulex.scm")

(module rmtmod
	*
	
(import scheme chicken data-structures extras)
(import scheme chicken data-structures extras matchable srfi-69)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18)
(import commonmod dbfile debugprint) ;; (prefix commonmod cmod:))
(import (prefix commonmod cmod:))
(import apimod)
(import (prefix ulex ulex:))
(import dbmod
	;; tcp-transportmod
	)

;; (import apimod)
;; (import (prefix ulex ulex:))

(include "db_records.scm")

(defstruct alldat
  (areapath #f)
  (ulexdat  #f)
  )

;; return the handle struct for sending queries to a specific database
;;  - initializes the connection object if this is the first access
;;    - finds the "captain" and asks who to talk to for the given dbfname
;;    - establishes the connection to the current dbowner
;; hold the send-receive proc in this parameter
(define rmtmod:send-receive #f) ;; (make-parameter #f))

;;======================================================================
;; M I S C
;;======================================================================

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmtmod:send-receive 'general-call run-id (append (list stmtname run-id) params)))
#;(define (rmt:connect alldat dbfname dbtype)
  (let* ((ulexdat    (or (alldat-ulexdat alldat)
			 (rmt:setup-ulex alldat))))
    (ulex:connect ulexdat dbfname dbtype)))

;; setup the remote calls
#;(define (rmt:setup-ulex alldat)
  (let* ((udata (ulex:setup))) ;; establish connection to ulex
    (alldat-ulexdat-set! alldat udata)
    ;; register all needed procs
    (ulex:register-handler udata 'ping cmod:get-full-version)  ;; override ping with get-full-version
    (ulex:register-handler udata 'login cmod:get-full-version) ;; force setup of the connection
    (ulex:register-handler udata 'execute api:execute-requests)
    udata))

;; set up a connection to the current owner of the dbfile associated with rid



;;======================================================================
;; import an sexpr file into the db
;;======================================================================

(define (rmt:import-sexpr sexpr-file)
  (if (file-exists? sexpr-file)
      (let* ((data (with-input-from-file sexpr-file read)))
	(for-each
	 (lambda (targ-dat)
	   (rmt:import-target targ-dat)) ;; ("target" ("run1" ("data" (1 ("field" . "value") ...
	 data))
      (let* ((msg (conc "ERROR: file "sexpr-file" not found")))
	(debug:print 0 *default-log-port* msg)
	(cons #f msg))))

(define (rmt:import-target targ-dat)
  (let* ((target (car targ-dat))
	 (data   (cdr targ-dat)))
    (for-each
     (lambda (run-dat)
       (rmt:import-run target run-dat)) ;; ("runname" ("data" ("testid" ("field" . "value") ...
     data)))

(define (rmt:import-run target run-dat)
  (let* ((runname    (car run-dat))
	 (all-dat    (cdr run-dat))
	 (tests-data (alist-ref "data" all-dat equal?))
	 (run-meta   (alist-ref "meta" all-dat equal?))
         (run-id     (string->number (alist-ref "id"   run-meta equal?))))

    (rmt:insert-run run-id target runname run-meta)
    (if (list? tests-data)
      (begin
        (debug:print 0 *default-log-port* "import-run: inserting " (length tests-data) " tests")
        (for-each
          (lambda (test-dat)
            (let* ((test-id  (car test-dat))
	      (test-rec (cdr test-dat)))
	      (rmt:insert-test run-id test-rec)))
         tests-data)
      )
      (debug:print 0 *default-log-port* "import-run: run has no tests")
    )
  )
)

;; insert run if not there, return id either way
(define (rmt:insert-run run-id target runname run-meta)
  ;; look for id, return if found
  (let* ((runs (rmtmod:send-receive 'simple-get-runs #f
				    ;;    runpatt count offset target last-update)
				    (list runname #f    #f     target #f))))
    (if (null? runs)
       (begin
        (debug:print 0 *default-log-port* "inserting run for runname " runname " target " target)
	(rmtmod:send-receive 'insert-run #f (list run-id target runname run-meta))
       )
       (begin
        (debug:print 0 *default-log-port* "Found run-id " (simple-run-id (car runs)) " for runname " runname " target " target)
	(simple-run-id (car runs))
       ))))

(define (rmt:insert-test run-id test-rec)
  (let* ((testname  (alist-ref "testname" test-rec equal?))
	 (item-path (alist-ref "item_path" test-rec equal?))
         (test-id (rmt:get-test-id run-id testname item-path))
         )
        (if test-id
          (debug:print 0 *default-log-port* "test "testname"/"item-path " already exists in run-id " run-id)
          (begin
            (debug:print 0 *default-log-port* " Insert test in run "run-id": "testname"/"item-path)
            (rmtmod:send-receive 'insert-test run-id test-rec)
          )
        )
  )
)

;;======================================================================
;;  T E S T S
;;======================================================================

;; Just some syntatic sugar
(define (rmt:register-test run-id test-name item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (number? test-id)
      (rmtmod:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:get-test-state-status-by-id run-id test-id)
  (rmtmod:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmtmod:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
;;   (assert (number? run-id) "FATAL: Run id required.")
;;   (let* ((test-path (if (string? work-area)
;; 			work-area
;; 			(rmt:test-get-rundir-from-test-id run-id test-id))))
;;     (debug:print 3 *default-log-port* "TEST PATH: " test-path)
;;     (open-test-db test-path)))

;; WARNING: This currently bypasses the transaction wrapped writes system
(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))

(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))

(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  (assert (number? run-id) "FATAL: Run id required.")
  ;; (if (number? run-id)
  (rmtmod:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
  ;;    (begin
  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
  ;;	(print-call-chain (current-error-port))
  ;;	'())))

(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))

;; get stuff via synchash 
(define (rmt:synchash-get run-id proc synckey keynum params)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  
;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))


;;======================================================================
;; Maintenance
;;======================================================================


(define (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)
  (rmtmod:send-receive 'get-toplevels-and-incompletes run-id (list run-id running-deadtime remotehoststart-deadtime)))

(define (rmt:get-status-from-final-status-file run-dir)
  (let ((infile (conc run-dir "/.final-status")))
    ;; first verify we are able to write the output file
    (if (not (file-read-access? infile))
        (begin 
	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
          #f
          )
	(let ((res (with-input-from-file infile read-lines)))
	  (if (null? res)
	      #f
	      (string-split (car res)))))))
  
;; then send the query to that dbfile owner and wait for a response.
;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
;;
;; NOT EASY TO MIGRATE TO db{file,mod}
;;
#;(define (rmt:send-receive alldat cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
  (let* (;; (alldat   *alldat*)
(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
  (let* ((incompleted '())
	 (areapath (alldat-areapath alldat))
	 (dbtype   (if (or (not rid)(< rid 1)) ;; this is the criteria for "main.db"
		       "main" "runs"))
	 (oldlaunched '())
	 (dbfname  (if (equal? dbtype "main")
		       "main.db"
		       (conc rid ".db")))
	 (dbfile   (conc areapath "/.db/" dbfname))
	 (ulexconn (rmt:connect alldat dbfname dbtype))  ;; ulexconn is our new *runremote*, it is a dbowner struct < pdat lastrefresh >
	 (udata    (alldat-ulexdat alldat)))
	 (toplevels   '())
    	(ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid params)))
    ;; need to call this on the other side 
    ;; (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
    
    #;(with-input-from-string
	(ulex:remote-request udata ulexconn 'immediate dbfile 'execute rid (with-output-to-string (lambda ()(serialize params))))
       (lambda ()(deserialize)))
          ;; The default running-deadtime is 720 seconds = 12 minutes.
          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
         (deadtime-trim (or ovr-deadtime cfg-deadtime))
         (server-start-allowance 200)
         (server-overloaded-budget 200)
         (launch-monitor-off-time (or test-stats-update-period 30))
         (launch-monitor-on-time-budget 30)
         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)

    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)

    (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)))
      (set! oldlaunched (list-ref dat 1))
      (set! toplevels   (list-ref dat 2))
      (set! incompleted (list-ref dat 0)))

    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
		      (length toplevels) " old LAUNCHED toplevel tests and "
		      (length incompleted) " tests marked RUNNING but apparently dead.")
  
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
    ;;
    ;; (db:delay-if-busy dbdat)
    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
      (if (> (length all-ids) 0)
	  (begin
	    ;; (launch:is-test-alive "localhost" 435)
	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
			 " as DEAD")
	    (for-each
             (lambda (test-id)
               (let* ((tinfo   (rmt:get-test-info-by-id run-id test-id))
		      (run-dir (db:test-get-rundir     tinfo))
		      (host    (db:test-get-host       tinfo))
		      (pid     (db:test-get-process_id tinfo))
		      (result (rmt:get-status-from-final-status-file run-dir)))
		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
		     (begin
		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
		       (rmt:set-state-status-and-roll-up-items
			run-id test-id 'foo "COMPLETED" "PASS"
			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
					  (commonmod:is-test-alive host pid))))
		       (if is-alive
			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
					" has a process on pid " pid ", NOT setting to DEAD.")
			   (begin
			     (debug:print 0 *default-log-port* "INFO: test " test-id
					  " final state/status is not COMPLETED/PASS. It is " result)
			     (rmt:set-state-status-and-roll-up-items
			      run-id test-id 'foo "COMPLETED" "DEAD"
			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
	     ;; call end of eud of run detection for posthook - from merge, is it needed?
	     ;; (launch:end-of-run-check run-id)
	     all-ids)
	    )))))

;;======================================================================
;; Misc
;;======================================================================

;; (define (rmtmod:wait-on-server-load run-id ttdat)
;;   (let* ((dbfname                 (dbmod:run-id->dbfname run-id))
;; 	 (get-lowest-thread-load
;; 	  (lambda ()
;; 	    (let* ((sdats (tt:get-server-info-sorted ttdat dbfname)))
;; 	      (car (map tt:get-server-threads sdats))))))
;;     (if ttdat
;; 	(let loop ()
;; 	  (if (> (get-lowest-thread-load) 5) ;; load is pretty high
;; 	      (begin
;; 		(debug:print 0 *default-log-port* "Servers appear overloaded, waiting...")
;; 		(thread-sleep! 1)
;; 		(loop))))
;; 	(debug:print 0 *default-log-port* "Can't wait on server load, *ttdat* not set"))))

)