Megatest

Diff
Login

Differences From Artifact [228baa4ee9]:

To Artifact [908cebfd40]:


62
63
64
65
66
67
68

69
70
71
72
73
74
75
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76







+







	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (fullrunscript (if runscript (conc testpath "/" runscript) #f))
	       (db        #f)
	       (tdb       #f)
	       (rollup-status 0))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
89
90
91
92
93
94
95


96
97
98
99
100
101
102
103
104





105
106
107
108
109
110
111

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

119
120
121
122
123
124
125
126
127
128
129





130
131
132
133
134
135
136







+
+









+
+
+
+
+






-
+










-
-
-
-
-







	  (setenv "MT_MEGATEST"  megatest)
	  (setenv "MT_TARGET"    target)
	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
	  (change-directory top-path)
	  (if (not (setup-for-run))
	      (begin
		(debug:print 0 "Failed to setup, exiting") 
		;; (sqlite3:finalize! db)
		;; (sqlite3:finalize! tdb)
		(exit 1)))
	  (change-directory *toppath*)
	  ;; now can find our db
	  (set! db (open-db))
	  (if (not (args:get-arg "-server"))
	      (server:client-setup db))
	  ;; (set! *cache-on* #t)
	  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
	  (change-directory work-area) 

	  (on-exit (lambda ()
		     (debug:print 0 "Finalizing db!!!")
		     (sqlite3:finalize! db)))

	  (set-run-config-vars db run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db run-id test-name itemdat)
	  (test-set-meta-info db test-id run-id test-name itemdat)
	  (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  ;; from here on out we will open and close the db
	  ;; on every access to reduce the probablitiy of 
	  ;; contention or stuck access on nfs.
	  (sqlite3:finalize! db)

	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268












269
270
271
272
273
274
275
255
256
257
258
259
260
261










262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280







-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+







							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (let loop ((minutes   (calc-minutes)))
				     (let* ((db       (open-db))
					    (cpuload  (get-cpu-load))
					    (diskfree (get-df (current-directory)))
					    (tmpfree  (get-df "/tmp")))
				       (if (not (args:get-arg "-server"))
					   (server:client-setup db))
				       (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				       (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
				       (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				     ;; (let* (;; (db       (open-db))
					    ;; (cpuload  (get-cpu-load))
					    ;; (diskfree (get-df (current-directory)))
					    ;; (tmpfree  (get-df "/tmp")))
				     (begin
				       ;; (if (not (args:get-arg "-server"))
				       ;;	   (server:client-setup db))
				       ;; (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       ;; (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				       (set! kill-job? (test-get-kill-request db test-id)) ;; run-id test-name itemdat))
				       (test-set-meta-info db test-id run-id test-name itemdat minutes: minutes)
				       ;; (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
287
288
289
290
291
292
293

294
295
296
297

298
299
300
301
302
303
304
305
306
307
308
309



310
311

312
313
314
315
316
317
318
292
293
294
295
296
297
298
299
300
301
302

303
304
305
306
307
308
309
310
311
312



313
314
315
316

317
318
319
320
321
322
323
324







+



-
+









-
-
-
+
+
+

-
+







							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       (sqlite3:finalize! db)
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (set! db (open-db))
	    (if (not (args:get-arg "-server"))
		(server:client-setup db))
	    ;; (set! db (open-db))
	    ;; (if (not (args:get-arg "-server"))
	    ;;	(server:client-setup db))
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (rdb:get-test-info db run-id test-name item-path)))
		   (testinfo  (db:get-test-info-by-id db test-id))) ;; )) ;; run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (test-set-status! db test-id 
				      (if kill-job? "KILLED" "COMPLETED")
				      ;; Old logic:
				      ;; (if (vector-ref exit-info 1) ;; look at the exit-status, #t means it at least ran
339
340
341
342
343
344
345

346
347
348
349
350
351
352
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359







+







	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (sqlite3:finalize! db)
	    (sqlite3:finalize! tdb)
	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (setup-for-run)
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 
399
400
401
402
403
404
405
406

407
408
409
410
411
412
413
406
407
408
409
410
411
412

413
414
415
416
417
418
419
420







-
+







;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area db run-id test-src-path disk-path testname itemdat)
(define (create-work-area db run-id test-id test-src-path disk-path testname itemdat)
  (let* ((run-info (db:get-run-info db run-id))
	 (item-path (item-list->path itemdat))
	 (runname  (db:get-value-by-header (db:get-row run-info)
					   (db:get-header run-info)
					   "runname"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (key-vals (db:get-key-vals db run-id))
447
448
449
450
451
452
453
454

455
456
457
458
459
460
461
454
455
456
457
458
459
460

461
462
463
464
465
466
467
468







-
+







    ;; This wass highly inefficient, one db write for every subtest, potentially
    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
    ;; again. 

    ;; NB - This is not working right - some top tests are not getting the path set!!!

    (if (not (hash-table-ref/default *toptest-paths* testname #f))
	(let* ((testinfo       (db:get-test-info db run-id testname item-path))
	(let* ((testinfo       (db:get-test-info-by-id db test-id)) ;;  run-id testname item-path))
	       (curr-test-path (if testinfo (db:test-get-rundir testinfo) #f)))
	  (hash-table-set! *toptest-paths* testname curr-test-path)
	  (db:test-set-rundir! db run-id testname "" lnkpath) ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print 2 "INFO: Creating " toptest-path " and link " lnkpath)
547
548
549
550
551
552
553
554
555


556
557
558
559
560
561
562
563
564
565
566

567
568
569
570
571
572
573
554
555
556
557
558
559
560


561
562
563
564
565
566
567
568
569
570
571
572

573
574
575
576
577
578
579
580







-
-
+
+










-
+







	 (work-area  #f)
	 (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	 (diskpath   #f)
	 (cmdparms   #f)
	 (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	 (mt-bindir-path #f)
	 (item-path (item-list->path itemdat))
	 (testinfo   (rdb:get-test-info db run-id test-name item-path))
	 (test-id    (db:test-get-id testinfo))
	 (test-id    (db:get-test-id db run-id test-name item-path))
	 (testinfo   (db:get-test-info-by-id db test-id))
	 (mt_target  (string-intersperse (map cadr keyvallst) "/"))
	 (debug-param (if (args:get-arg "-debug")(list "-debug" (args:get-arg "-debug")) '())))
    (if hosts (set! hosts (string-split hosts)))
    ;; set the megatest to be called on the remote host
    (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (set! diskpath (get-best-disk *configdat*))
    (if diskpath
	(let ((dat  (create-work-area db run-id test-path diskpath test-name itemdat)))
	(let ((dat  (create-work-area db run-id test-id test-path diskpath test-name itemdat)))
	  (set! work-area (car dat))
	  (set! toptest-work-area (cadr dat))
	  (debug:print 2 "INFO: Using work area " work-area))
	(begin
	  (set! work-area (conc test-path "/tmp_run"))
	  (create-directory work-area #t)
	  (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))
585
586
587
588
589
590
591
592

593
594
595
596
597
598
599
592
593
594
595
596
597
598

599
600
601
602
603
604
605
606







-
+







							  (list 'ezsteps   ezsteps) 
							  (list 'target    mt_target)
							  (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
							  (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
							  (list 'runname   runname)
							  (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
    ;; clean out step records from previous run if they exist
    (db:delete-test-step-records db run-id test-name itemdat)
    (db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))