Megatest

Diff
Login

Differences From Artifact [228baa4ee9]:

To Artifact [a8ad7abd36]:


61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
61
62
63
64
65
66
67

68
69
70
71
72
73
74







-







	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (fullrunscript (if runscript (conc testpath "/" runscript) #f))
	       (db        #f)
	       (rollup-status 0))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
89
90
91
92
93
94
95


96
97
98

99
100
101
102
103

104

105

106
107
108

109
110
111
112


113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
88
89
90
91
92
93
94
95
96
97
98

99





100
101
102

103
104
105

106
107
108


109
110
111
112
113
114
115
116
117
118
119





120
121
122
123
124
125
126







+
+


-
+
-
-
-
-
-
+

+
-
+


-
+


-
-
+
+









-
-
-
-
-







	  (setenv "MT_MEGATEST"  megatest)
	  (setenv "MT_TARGET"    target)
	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
	  (change-directory top-path)
	  (if (not (setup-for-run))
	      (begin
		(debug:print 0 "Failed to setup, exiting") 
		;; (sqlite3:finalize! db)
		;; (sqlite3:finalize! tdb)
		(exit 1)))
	  (change-directory *toppath*)
	  ;; now can find our db

	  (set! db (open-db))
	  (if (not (args:get-arg "-server"))
	      (server:client-setup db))
	  ;; (set! *cache-on* #t)
	  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
	  (open-run-close-measure set-megatest-env-vars #f run-id) ;; these may be needed by the launching process
	  (change-directory work-area) 

	  (set-run-config-vars db run-id)
	  (open-run-close set-run-config-vars #f run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (open-run-close set-megatest-env-vars #f run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db run-id test-name itemdat)
	  (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0)
	  (open-run-close test-set-status! #f test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  ;; from here on out we will open and close the db
	  ;; on every access to reduce the probablitiy of 
	  ;; contention or stuck access on nfs.
	  (sqlite3:finalize! db)

	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
150
151
152
153
154
155
156
157

158
159
160
161
162
163
164
165
166
167
143
144
145
146
147
148
149

150



151
152
153
154
155
156
157







-
+
-
-
-







						(thread-sleep! 2)
						(loop (+ i 1)))
					      )))))
				 ;; then, if runscript ran ok (or did not get called)
				 ;; do all the ezsteps (if any)
				 (if ezsteps
				     (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
					    (ezstepslst (hash-table-ref/default testconfig "ezsteps" '()))
					    (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())))
					    (db         (open-db)))
				       (if (not (args:get-arg "-server"))
					   (server:client-setup db))
				       (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps"))
				       ;; if ezsteps was defined then we are sure to have at least one step but check anyway
				       (if (not (> (length ezstepslst) 0))
					   (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length")
					   (let loop ((ezstep (car ezstepslst))
						      (tal    (cdr ezstepslst))
						      (prevstep #f))
187
188
189
190
191
192
193
194

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212

213
214

215
216
217
218
219
220
221
177
178
179
180
181
182
183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201

202
203

204
205
206
207
208
209
210
211







-
+

















-
+

-
+







						   ;;       (set! script (conc script "source " prev-env))))
						   
						   ;; call the command using mt_ezstep
						   (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd))

						   (debug:print 4 "script: " script)

						   (rdb:teststep-set-status! db test-id stepname "start" "-" itemdat #f #f)
						   (open-run-close db:teststep-set-status! #f test-id stepname "start" "-" itemdat #f #f)
						   ;; now launch
						   (let ((pid (process-run script)))
						     (let processloop ((i 0))
						       (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
								   (mutex-lock! m)
								   (vector-set! exit-info 0 pid)
								   (vector-set! exit-info 1 exit-status)
								   (vector-set! exit-info 2 exit-code)
								   (mutex-unlock! m)
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   ))
                                                     (let ((exinfo (vector-ref exit-info 2))
                                                           (logfna (if logpro-used (conc stepname ".html") "")))
                                                        ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
						        (rdb:teststep-set-status! db test-id stepname "end" exinfo itemdat #f logfna))
						        (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo itemdat #f logfna))
						     (if logpro-used
							 (rdb:test-set-log! db test-id (conc stepname ".html")))
							 (open-run-close db:test-set-log! #f test-id (conc stepname ".html")))
						     ;; set the test final status
						     (let* ((this-step-status (cond
									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									       ((eq? (vector-ref exit-info 2) 0)                   'pass)
									       (else 'fail)))
							    (overall-status   (cond
									       ((eq? rollup-status 2) 'warn)
229
230
231
232
233
234
235
236

237
238
239
240

241
242
243

244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268












269
270
271
272
273
274
275
219
220
221
222
223
224
225

226
227
228
229

230
231
232

233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248










249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267







-
+



-
+


-
+















-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+







						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (test-set-status! db test-id "RUNNING" "WARN" 
							  (open-run-close test-set-status! #f test-id "RUNNING" "WARN" 
									    (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									    #f))
							 ((pass)
							  (test-set-status! db test-id "RUNNING" "PASS" #f #f))
							  (open-run-close test-set-status! #f test-id "RUNNING" "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (test-set-status! db test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  (open-run-close test-set-status! #f test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
					     (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (let loop ((minutes   (calc-minutes)))
				     (let* ((db       (open-db))
					    (cpuload  (get-cpu-load))
					    (diskfree (get-df (current-directory)))
					    (tmpfree  (get-df "/tmp")))
				       (if (not (args:get-arg "-server"))
					   (server:client-setup db))
				       (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				       (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
				       (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				     ;; (let* (;; (db       (open-db))
					    ;; (cpuload  (get-cpu-load))
					    ;; (diskfree (get-df (current-directory)))
					    ;; (tmpfree  (get-df "/tmp")))
				     (begin
				       ;; (if (not (args:get-arg "-server"))
				       ;;	   (server:client-setup db))
				       ;; (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       ;; (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				       (set! kill-job? (open-run-close test-get-kill-request #f test-id)) ;; run-id test-name itemdat))
				       (open-run-close test-set-meta-info #f test-id run-id test-name itemdat minutes)
				       ;; (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
284
285
286
287
288
289
290
291

292
293

294
295
296
297

298
299
300
301
302
303
304
305
306
307
308
309



310
311

312
313
314
315

316
317
318
319
320
321
322
276
277
278
279
280
281
282

283
284

285
286
287
288

289
290
291
292
293
294
295
296
297
298



299
300
301
302

303
304
305
306

307
308
309
310
311
312
313
314







-
+

-
+



-
+









-
-
-
+
+
+

-
+



-
+







								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db test-id "KILLED"  "FAIL"
						     (open-run-close test-set-status! #f test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       (sqlite3:finalize! db)
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (set! db (open-db))
	    (if (not (args:get-arg "-server"))
		(server:client-setup db))
	    ;; (set! db (open-db))
	    ;; (if (not (args:get-arg "-server"))
	    ;;	(server:client-setup db))
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (rdb:get-test-info db run-id test-name item-path)))
		   (testinfo  (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (test-set-status! db test-id 
		    (open-run-close test-set-status! #f test-id 
				      (if kill-job? "KILLED" "COMPLETED")
				      ;; Old logic:
				      ;; (if (vector-ref exit-info 1) ;; look at the exit-status, #t means it at least ran
				      ;;     (if (and (not kill-job?) 
				      ;;         (eq? (vector-ref exit-info 2) 0)) ;; we can now use rollup-status instead
				      ;;         "PASS"
				      ;;         "FAIL")
331
332
333
334
335
336
337
338

339
340
341
342
343
344
345


346
347
348
349
350
351
352
323
324
325
326
327
328
329

330
331
332
333
334
335
336

337
338
339
340
341
342
343
344
345







-
+






-
+
+







				       ((eq? rollup-status 2)
					;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
					(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				       (else "FAIL"))
				      (args:get-arg "-m") #f)))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items db run-id test-name #f)) ;; don't force - just update if no
		  (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (sqlite3:finalize! db)
	    ;; (sqlite3:finalize! db)
	    ;; (sqlite3:finalize! tdb)
	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (setup-for-run)
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 
399
400
401
402
403
404
405
406

407
408
409
410
411
412
413
392
393
394
395
396
397
398

399
400
401
402
403
404
405
406







-
+







;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area db run-id test-src-path disk-path testname itemdat)
(define (create-work-area db run-id test-id test-src-path disk-path testname itemdat)
  (let* ((run-info (db:get-run-info db run-id))
	 (item-path (item-list->path itemdat))
	 (runname  (db:get-value-by-header (db:get-row run-info)
					   (db:get-header run-info)
					   "runname"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (key-vals (db:get-key-vals db run-id))
447
448
449
450
451
452
453
454

455
456
457
458
459
460
461
440
441
442
443
444
445
446

447
448
449
450
451
452
453
454







-
+







    ;; This wass highly inefficient, one db write for every subtest, potentially
    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
    ;; again. 

    ;; NB - This is not working right - some top tests are not getting the path set!!!

    (if (not (hash-table-ref/default *toptest-paths* testname #f))
	(let* ((testinfo       (db:get-test-info db run-id testname item-path))
	(let* ((testinfo       (db:get-test-info-by-id db test-id)) ;;  run-id testname item-path))
	       (curr-test-path (if testinfo (db:test-get-rundir testinfo) #f)))
	  (hash-table-set! *toptest-paths* testname curr-test-path)
	  (db:test-set-rundir! db run-id testname "" lnkpath) ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print 2 "INFO: Creating " toptest-path " and link " lnkpath)
547
548
549
550
551
552
553
554
555


556
557
558
559
560
561
562
563
564
565
566

567
568
569
570
571
572
573
540
541
542
543
544
545
546


547
548
549
550
551
552
553
554
555
556
557
558

559
560
561
562
563
564
565
566







-
-
+
+










-
+







	 (work-area  #f)
	 (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	 (diskpath   #f)
	 (cmdparms   #f)
	 (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	 (mt-bindir-path #f)
	 (item-path (item-list->path itemdat))
	 (testinfo   (rdb:get-test-info db run-id test-name item-path))
	 (test-id    (db:test-get-id testinfo))
	 (test-id    (open-run-close db:get-test-id db run-id test-name item-path))
	 (testinfo   (open-run-close db:get-test-info-by-id db test-id))
	 (mt_target  (string-intersperse (map cadr keyvallst) "/"))
	 (debug-param (if (args:get-arg "-debug")(list "-debug" (args:get-arg "-debug")) '())))
    (if hosts (set! hosts (string-split hosts)))
    ;; set the megatest to be called on the remote host
    (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (set! diskpath (get-best-disk *configdat*))
    (if diskpath
	(let ((dat  (create-work-area db run-id test-path diskpath test-name itemdat)))
	(let ((dat  (open-run-close create-work-area db run-id test-id test-path diskpath test-name itemdat)))
	  (set! work-area (car dat))
	  (set! toptest-work-area (cadr dat))
	  (debug:print 2 "INFO: Using work area " work-area))
	(begin
	  (set! work-area (conc test-path "/tmp_run"))
	  (create-directory work-area #t)
	  (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))
585
586
587
588
589
590
591

592

593

594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
578
579
580
581
582
583
584
585

586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601

602
603
604
605
606
607
608







+
-
+

+













-







							  (list 'ezsteps   ezsteps) 
							  (list 'target    mt_target)
							  (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
							  (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
							  (list 'runname   runname)
							  (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
    ;; clean out step records from previous run if they exist
    (debug:print 4 "INFO: FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    (db:delete-test-step-records db run-id test-name itemdat)
    (open-run-close db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (open-run-close test-set-status! db test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
     (else
      (if (not useshell)(debug:print 0 "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
      (set! fullcmd (append (list remote-megatest test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
    ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
    (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
    (debug:print 1 "Launching " work-area)
    (test-set-status! db test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
    (debug:print 4 "fullcmd: " fullcmd)
    (let* ((commonprevvals (alist->env-vars
			    (hash-table-ref/default *configdat* "env-override" '())))
	   (testprevvals   (alist->env-vars
			    (hash-table-ref/default test-conf "pre-launch-env-overrides" '())))
	   (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
631
632
633
634
635
636
637
638

639
640
641
642
643
644
645
646
647
648
649
650
625
626
627
628
629
630
631

632
633
634
635
636
637
638
639
640
641
642
643
644







-
+












	(lambda ()
	  (apply print launch-results)))
      (debug:print 2 "Launching completed, updating db")
      (debug:print 2 "Launch results: " launch-results)
      (if (not launch-results)
	  (begin
	    (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
	    (sqlite3:finalize! db)
	    ;; (sqlite3:finalize! db)
	    ;; good ole "exit" seems not to work
	    ;; (_exit 9)
	    ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
	    ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	    (process-signal (current-process-id) signal/kill)
	    ))
      (alist->env-vars miscprevvals)
      (alist->env-vars testprevvals)
      (alist->env-vars commonprevvals)
      launch-results))
  (change-directory *toppath*))