Megatest

Diff
Login

Differences From Artifact [7e65ac64d4]:

To Artifact [05ec7cefc2]:


30
31
32
33
34
35
36









37
38
39
40
41
42
43
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52







+
+
+
+
+
+
+
+
+








(declare (unit launch))
(declare (uses subrun))
(declare (uses common))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))

(declare (uses commonmod))
(import commonmod)

(declare (uses configfmod))
(import configfmod)

(declare (uses dbmod))
(import dbmod)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")

;;======================================================================
166
167
168
169
170
171
172

173



174
175
176
177


178
179
180
181
182
183
184


185
186
187
188
189
190
191
192
193
194
195
196


197
198
199
200
201
202
203
175
176
177
178
179
180
181
182

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197

198
199
200
201
202
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
218
219







+
-
+
+
+




+
+






-
+
+











-
+
+







              (set! ezsteps #t) ;; set the needed flag
	      (set! ezstepslst
                    (append (or ezstepslst '())
                            (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))

	;; process the ezsteps
	(if ezsteps
	    (let* ((envdbf        (conc "/tmp/."(current-user-name)"-"(current-process-id)"-"run-id"-"test-id".db"))
	    (let* ((all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; where  'params is the params list (add other stuff as needed)
		   (all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist;
	                                              ;;; where  'params is the params list (add other
	                                              ;;; stuff as needed)
	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
	      (if (not (> (length ezstepslst) 0))
		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
		  (let ((all-step-names (map car ezstepslst)))
		    (setenv "MT_STEP_NAMES" (string-intersperse all-step-names " "))
		  (let loop ((ezstep (car ezstepslst))
			     (tal    (cdr ezstepslst))
			     (prevstep #f))
                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
		    ;; check exit-info (vector-ref exit-info 1)
		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
			(let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat))
			  (let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m
							      tal testconfig all-steps-dat prevstep envdbf))
			       (stepname    (car ezstep))
			       (stepparms   (hash-table-ref all-steps-dat stepname)))
			  (setenv "MT_STEP_NAME" stepname)
			  (pp (hash-table->alist all-steps-dat))
			  ;; if logpro-used read in the stepname.dat file
			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
			      (launch:load-logpro-dat run-id test-id stepname))
			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms)
			      (if (not (null? tal))
				  (loop (car tal) (cdr tal) stepname))
			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))
			  (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))
		      ))))))))

(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
         (start-seconds (current-seconds))
	 (calc-minutes  (lambda ()
			  (inexact->exact 
			   (round 
252
253
254
255
256
257
258
259

260
261
262
263
264
265
266
268
269
270
271
272
273
274

275
276
277
278
279
280
281
282







-
+







        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          ;; (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
          ;;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
	  )
        
	(if kill-job? 
	    (begin
              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
784
785
786
787
788
789
790










791
792
793
794
795
796
797







-
-
-
-
-
-
-
-
-
-







			     (tal    (cdr not-completed-tests)))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11)))
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
              (if (not (null? tal))
				  (loop (car tal) (cdr tal)))))))))))        
 
(define (launch:is-test-alive host pid)
  (if (and host pid (not (equal? host "n/a")))
      (let* ((cmd (conc "ssh " host " pstree -A " pid))
	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	   #f
	   #t))
      #t))
 
(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11))
924
925
926
927
928
929
930

931
932
933
934
935
936
937
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944







+







	       mtcachef  rccachef
	       use-cache
	       (get-environment-variable "MT_RUN_AREA_HOME")
	       (common:file-exists? mtcachef)
	       (common:file-exists? rccachef))
          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
          (set! *configdat*    (configf:read-alist mtcachef))
	  (set! *db-keys*      (common:get-fields *configdat*))
          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
	  (set! *runconfigdat* (configf:read-alist rccachef))
	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
	  (set! *configstatus* 'fulldata)
	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
	  *toppath*)
	 ;; there are no existing cached configs, do full reads of the configs and cache them
960
961
962
963
964
965
966

967
968
969
970
971
972
973
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981







+







		(begin
                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
		  (set! *configdat*  (car first-pass))
                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
		  (set! *configinfo* first-pass)
		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
		  (set! toppath      *toppath*)
		  (set! *db-keys*    (common:get-fields *configdat*))
		  (if (not *toppath*)
		      (begin
			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
			(exit 1)))
		  (setenv "MT_RUN_AREA_HOME" *toppath*)
		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
		  (let* ((keys         (common:list-or-null (rmt:get-keys)
1023
1024
1025
1026
1027
1028
1029

1030
1031
1032
1033
1034
1035
1036
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045







+








            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
		       (rdat     (read-config (conc toppath  ;; convert this to use runconfig:read!
						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
		  (set! *configinfo*   cfgdat)
		  (set! *configdat*    (car cfgdat))
		  (set! *db-keys*    (common:get-fields *configdat*))
		  (set! *runconfigdat* rdat)
		  (set! *toppath*      toppath)
		  (set! *configstatus* 'partial))
		(begin
		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
		  (exit 2))))))
	;; COND ends here.