︙ | | | ︙ | |
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
(declare (unit launch))
(declare (uses subrun))
(declare (uses common))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")
;;======================================================================
|
>
>
>
>
>
>
>
>
>
|
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
(declare (unit launch))
(declare (uses subrun))
(declare (uses common))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))
(declare (uses commonmod))
(import commonmod)
(declare (uses configfmod))
(import configfmod)
(declare (uses dbmod))
(import dbmod)
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")
;;======================================================================
|
︙ | | | ︙ | |
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
|
(set! ezsteps #t) ;; set the needed flag
(set! ezstepslst
(append (or ezstepslst '())
(list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))
;; process the ezsteps
(if ezsteps
(let* ((all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist; where 'params is the params list (add other stuff as needed)
(if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
;; if ezsteps was defined then we are sure to have at least one step but check anyway
(if (not (> (length ezstepslst) 0))
(debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
(let loop ((ezstep (car ezstepslst))
(tal (cdr ezstepslst))
(prevstep #f))
(debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
;; check exit-info (vector-ref exit-info 1)
(if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
(let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat))
(stepname (car ezstep))
(stepparms (hash-table-ref all-steps-dat stepname)))
(setenv "MT_STEP_NAME" stepname)
(pp (hash-table->alist all-steps-dat))
;; if logpro-used read in the stepname.dat file
(if (and logpro-used (common:file-exists? (conc stepname ".dat")))
(launch:load-logpro-dat run-id test-id stepname))
(if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms)
(if (not (null? tal))
(loop (car tal) (cdr tal) stepname))
(debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))
(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
(let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
(start-seconds (current-seconds))
(calc-minutes (lambda ()
(inexact->exact
(round
|
>
|
>
>
>
>
|
>
|
>
|
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
|
(set! ezsteps #t) ;; set the needed flag
(set! ezstepslst
(append (or ezstepslst '())
(list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))
;; process the ezsteps
(if ezsteps
(let* ((envdbf (conc "/tmp/."(current-user-name)"-"(current-process-id)"-"run-id"-"test-id".db"))
(all-steps-dat (make-hash-table))) ;; keep all the info around as stepname ==> alist;
;;; where 'params is the params list (add other
;;; stuff as needed)
(if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
;; if ezsteps was defined then we are sure to have at least one step but check anyway
(if (not (> (length ezstepslst) 0))
(debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
(let ((all-step-names (map car ezstepslst)))
(setenv "MT_STEP_NAMES" (string-intersperse all-step-names " "))
(let loop ((ezstep (car ezstepslst))
(tal (cdr ezstepslst))
(prevstep #f))
(debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
;; check exit-info (vector-ref exit-info 1)
(if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
(let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m
tal testconfig all-steps-dat prevstep envdbf))
(stepname (car ezstep))
(stepparms (hash-table-ref all-steps-dat stepname)))
(setenv "MT_STEP_NAME" stepname)
(pp (hash-table->alist all-steps-dat))
;; if logpro-used read in the stepname.dat file
(if (and logpro-used (common:file-exists? (conc stepname ".dat")))
(launch:load-logpro-dat run-id test-id stepname))
(if (steprun-good? logpro-used (launch:einf-exit-code exit-info) stepparms)
(if (not (null? tal))
(loop (car tal) (cdr tal) stepname))
(debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))
))))))))
(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
(let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
(start-seconds (current-seconds))
(calc-minutes (lambda ()
(inexact->exact
(round
|
︙ | | | ︙ | |
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
|
(debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
(launch:handle-zombie-tests run-id)
(when do-sync
;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds)))
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
)
(if kill-job?
(begin
(debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
(mutex-lock! m)
;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
|
|
|
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
|
(debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
(launch:handle-zombie-tests run-id)
(when do-sync
;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds)))
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
;;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
)
(if kill-job?
(begin
(debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
(mutex-lock! m)
;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
|
︙ | | | ︙ | |
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
|
(tal (cdr not-completed-tests)))
(let* ((test-name (vector-ref running-test 2))
(item-path (vector-ref running-test 11)))
(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
(if (not (null? tal))
(loop (car tal) (cdr tal)))))))))))
(define (launch:is-test-alive host pid)
(if (and host pid (not (equal? host "n/a")))
(let* ((cmd (conc "ssh " host " pstree -A " pid))
(output (with-input-from-pipe cmd read-lines)))
(debug:print 2 *default-log-port* "Running " cmd " received " output)
(if (eq? (length output) 0)
#f
#t))
#t))
(define (launch:kill-tests-if-dead run-id)
(let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
(let loop ((running-test (car running-tests))
(tal (cdr running-tests))
(kill-cnt 0))
(let* ((test-name (vector-ref running-test 2))
(item-path (vector-ref running-test 11))
|
<
<
<
<
<
<
<
<
<
<
|
784
785
786
787
788
789
790
791
792
793
794
795
796
797
|
(tal (cdr not-completed-tests)))
(let* ((test-name (vector-ref running-test 2))
(item-path (vector-ref running-test 11)))
(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
(if (not (null? tal))
(loop (car tal) (cdr tal)))))))))))
(define (launch:kill-tests-if-dead run-id)
(let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
(let loop ((running-test (car running-tests))
(tal (cdr running-tests))
(kill-cnt 0))
(let* ((test-name (vector-ref running-test 2))
(item-path (vector-ref running-test 11))
|
︙ | | | ︙ | |
924
925
926
927
928
929
930
931
932
933
934
935
936
937
|
mtcachef rccachef
use-cache
(get-environment-variable "MT_RUN_AREA_HOME")
(common:file-exists? mtcachef)
(common:file-exists? rccachef))
;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
(set! *configdat* (configf:read-alist mtcachef))
;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
(set! *runconfigdat* (configf:read-alist rccachef))
(set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME")))
(set! *configstatus* 'fulldata)
(set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME"))
*toppath*)
;; there are no existing cached configs, do full reads of the configs and cache them
|
>
|
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
|
mtcachef rccachef
use-cache
(get-environment-variable "MT_RUN_AREA_HOME")
(common:file-exists? mtcachef)
(common:file-exists? rccachef))
;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
(set! *configdat* (configf:read-alist mtcachef))
(set! *db-keys* (common:get-fields *configdat*))
;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
(set! *runconfigdat* (configf:read-alist rccachef))
(set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME")))
(set! *configstatus* 'fulldata)
(set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME"))
*toppath*)
;; there are no existing cached configs, do full reads of the configs and cache them
|
︙ | | | ︙ | |
960
961
962
963
964
965
966
967
968
969
970
971
972
973
|
(begin
;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
(set! *configdat* (car first-pass))
;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
(set! *configinfo* first-pass)
(set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
(set! toppath *toppath*)
(if (not *toppath*)
(begin
(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
(exit 1)))
(setenv "MT_RUN_AREA_HOME" *toppath*)
;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
(let* ((keys (common:list-or-null (rmt:get-keys)
|
>
|
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
|
(begin
;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
(set! *configdat* (car first-pass))
;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
(set! *configinfo* first-pass)
(set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
(set! toppath *toppath*)
(set! *db-keys* (common:get-fields *configdat*))
(if (not *toppath*)
(begin
(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
(exit 1)))
(setenv "MT_RUN_AREA_HOME" *toppath*)
;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
(let* ((keys (common:list-or-null (rmt:get-keys)
|
︙ | | | ︙ | |
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
|
(if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
(let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
(rdat (read-config (conc toppath ;; convert this to use runconfig:read!
"/runconfigs.config") *runconfigdat* #t sections: sections)))
(set! *configinfo* cfgdat)
(set! *configdat* (car cfgdat))
(set! *runconfigdat* rdat)
(set! *toppath* toppath)
(set! *configstatus* 'partial))
(begin
(debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
(exit 2))))))
;; COND ends here.
|
>
|
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
|
(if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
(let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
(rdat (read-config (conc toppath ;; convert this to use runconfig:read!
"/runconfigs.config") *runconfigdat* #t sections: sections)))
(set! *configinfo* cfgdat)
(set! *configdat* (car cfgdat))
(set! *db-keys* (common:get-fields *configdat*))
(set! *runconfigdat* rdat)
(set! *toppath* toppath)
(set! *configstatus* 'partial))
(begin
(debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
(exit 2))))))
;; COND ends here.
|
︙ | | | ︙ | |