1
2
3
4
5
6
7
8
9
|
;; Copyright 2006-2012, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
|
|
|
1
2
3
4
5
6
7
8
9
|
;; Copyright 2006-2013, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
|
︙ | | | ︙ | |
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
|
(test-id (assoc/default 'test-id cmdinfo))
(target (assoc/default 'target cmdinfo))
(itemdat (assoc/default 'itemdat cmdinfo))
(env-ovrd (assoc/default 'env-ovrd cmdinfo))
(set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar
(runname (assoc/default 'runname cmdinfo))
(megatest (assoc/default 'megatest cmdinfo))
(mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
(keys #f)
(keyvals #f)
(fullrunscript (if (not runscript)
#f
(if (substring-index "/" runscript)
runscript ;; use unadultered if contains slashes
(let ((fulln (conc testpath "/" runscript)))
(if (and (file-exists? fulln)
(file-execute-access? fulln))
fulln
runscript))))) ;; assume it is on the path
(rollup-status 0))
(change-directory top-path)
(debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
;; Setup the *runremote* global var
(if *runremote* (debug:print 2 "ERROR: I'm not expecting *runremote* to be set at this time"))
;; (set! *runremote* runremote)
(set! *transport-type* (string->symbol transport))
(set! keys (cdb:remote-run db:get-keys #f))
(set! keyvals (if run-id (cdb:remote-run db:get-key-vals #f run-id) #f))
;; apply pre-overrides before other variables. The pre-override vars must not
;; clobbers things from the official sources such as megatest.config and runconfigs.config
(if (string? set-vars)
(let ((varpairs (string-split set-vars ",")))
(debug:print 4 "varpairs: " varpairs)
(map (lambda (varpair)
(let ((varval (string-split varpair "=")))
(if (eq? (length varval) 2)
(let ((var (car varval))
(val (cadr varval)))
(debug:print 1 "Adding pre-var/val " var " = " val " to the environment")
(setenv var val)))))
varpairs)))
(setenv "MT_TEST_RUN_DIR" work-area)
(setenv "MT_TEST_NAME" test-name)
(setenv "MT_ITEM_INFO" (conc itemdat))
(setenv "MT_RUNNAME" runname)
(setenv "MT_MEGATEST" megatest)
(setenv "MT_TARGET" target)
(if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
;; (change-directory top-path)
(if (not (setup-for-run))
(begin
(debug:print 0 "Failed to setup, exiting")
;; (sqlite3:finalize! db)
;; (sqlite3:finalize! tdb)
(exit 1)))
;; Can setup as client for server mode now
;; (client:setup)
(change-directory *toppath*)
(set-megatest-env-vars run-id) ;; these may be needed by the launching process
(change-directory work-area)
(set-run-config-vars run-id keys keyvals target) ;; (db:get-target db run-id))
;; environment overrides are done *before* the remaining critical envars.
(alist->env-vars env-ovrd)
(set-megatest-env-vars run-id)
(set-item-env-vars itemdat)
(save-environment-as-files "megatest")
;; open-run-close not needed for test-set-meta-info
(tests:set-meta-info #f test-id run-id test-name itemdat 0 work-area)
(tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
(if (args:get-arg "-xterm")
(set! fullrunscript "xterm")
(if (and fullrunscript (not (file-execute-access? fullrunscript)))
(system (conc "chmod ug+x " fullrunscript))))
;; We are about to actually kick off the test
;; so this is a good place to remove the records for
;; any previous runs
;; (db:test-remove-steps db run-id testname itemdat)
(let* ((m (make-mutex))
(kill-job? #f)
(exit-info (vector #t #t #t))
(job-thread #f)
(runit (lambda ()
;; (let-values
;; (((pid exit-status exit-code)
;; (run-n-wait fullrunscript)))
(tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
;; if there is a runscript do it first
(if fullrunscript
(let ((pid (process-run fullrunscript)))
(let loop ((i 0))
(let-values
(((pid-val exit-status exit-code) (process-wait pid #t)))
(mutex-lock! m)
|
>
|
|
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
>
|
|
|
|
>
|
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
|
(test-id (assoc/default 'test-id cmdinfo))
(target (assoc/default 'target cmdinfo))
(itemdat (assoc/default 'itemdat cmdinfo))
(env-ovrd (assoc/default 'env-ovrd cmdinfo))
(set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar
(runname (assoc/default 'runname cmdinfo))
(megatest (assoc/default 'megatest cmdinfo))
(runtlim (assoc/default 'runtlim cmdinfo))
(mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
(keys #f)
(keyvals #f)
(fullrunscript (if (not runscript)
#f
(if (substring-index "/" runscript)
runscript ;; use unadultered if contains slashes
(let ((fulln (conc testpath "/" runscript)))
(if (and (file-exists? fulln)
(file-execute-access? fulln))
fulln
runscript))))) ;; assume it is on the path
(rollup-status 0))
(change-directory top-path)
(debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
;; Setup the *runremote* global var
(if *runremote* (debug:print 2 "ERROR: I'm not expecting *runremote* to be set at this time"))
;; (set! *runremote* runremote)
;; (set! *transport-type* (string->symbol transport))
(set! keys (cdb:remote-run db:get-keys #f))
(set! keyvals (keys:target->keyval keys target))
;; apply pre-overrides before other variables. The pre-override vars must not
;; clobbers things from the official sources such as megatest.config and runconfigs.config
(if (string? set-vars)
(let ((varpairs (string-split set-vars ",")))
(debug:print 4 "varpairs: " varpairs)
(map (lambda (varpair)
(let ((varval (string-split varpair "=")))
(if (eq? (length varval) 2)
(let ((var (car varval))
(val (cadr varval)))
(debug:print 1 "Adding pre-var/val " var " = " val " to the environment")
(setenv var val)))))
varpairs)))
(for-each
(lambda (varval)
(let ((var (car varval))
(val (cadr varval)))
(if val
(setenv var val)
(begin
(debug:print 0 "ERROR: required variable " var " does not have a valid value. Exiting")
(exit)))))
(list
(list "MT_TEST_RUN_DIR" work-area)
(list "MT_TEST_NAME" test-name)
(list "MT_ITEM_INFO" (conc itemdat))
(list "MT_RUNNAME" runname)
(list "MT_MEGATEST" megatest)
(list "MT_TARGET" target)
(list "MT_LINKTREE" (configf:lookup *configdat* "setup" "linktree"))))
(if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
;; (change-directory top-path)
(if (not (setup-for-run))
(begin
(debug:print 0 "Failed to setup, exiting")
;; (sqlite3:finalize! db)
;; (sqlite3:finalize! tdb)
(exit 1)))
;; Can setup as client for server mode now
;; (client:setup)
(change-directory *toppath*)
(set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
(change-directory work-area)
(set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
;; environment overrides are done *before* the remaining critical envars.
(alist->env-vars env-ovrd)
(set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
(set-item-env-vars itemdat)
(save-environment-as-files "megatest")
;; open-run-close not needed for test-set-meta-info
(tests:set-full-meta-info #f test-id run-id 0 work-area)
;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
(tests:test-force-state-status! test-id "REMOTEHOSTSTART" "n/a")
(thread-sleep! 0.3) ;; NFS slowness has caused grief here
(if (args:get-arg "-xterm")
(set! fullrunscript "xterm")
(if (and fullrunscript (not (file-execute-access? fullrunscript)))
(system (conc "chmod ug+x " fullrunscript))))
;; We are about to actually kick off the test
;; so this is a good place to remove the records for
;; any previous runs
;; (db:test-remove-steps db run-id testname itemdat)
(let* ((m (make-mutex))
(kill-job? #f)
(exit-info (vector #t #t #t))
(job-thread #f)
(keep-going #t)
(runit (lambda ()
;; (let-values
;; (((pid exit-status exit-code)
;; (run-n-wait fullrunscript)))
;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
;; Since we should have a clean slate at this time there is no need to do
;; any of the other stuff that tests:test-set-status! does. Let's just
;; force RUNNING/n/a
(thread-sleep! 0.3)
(tests:test-force-state-status! test-id "RUNNING" "n/a")
(thread-sleep! 0.3) ;; NFS slowness has caused grief here
;; if there is a runscript do it first
(if fullrunscript
(let ((pid (process-run fullrunscript)))
(let loop ((i 0))
(let-values
(((pid-val exit-status exit-code) (process-wait pid #t)))
(mutex-lock! m)
|
︙ | | | ︙ | |
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
(loop (+ i 1)))
)))))
;; then, if runscript ran ok (or did not get called)
;; do all the ezsteps (if any)
(if ezsteps
(let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
(ezstepslst (hash-table-ref/default testconfig "ezsteps" '())))
(if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps"))
;; if ezsteps was defined then we are sure to have at least one step but check anyway
(if (not (> (length ezstepslst) 0))
(debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length")
(let loop ((ezstep (car ezstepslst))
(tal (cdr ezstepslst))
(prevstep #f))
|
>
|
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
(loop (+ i 1)))
)))))
;; then, if runscript ran ok (or did not get called)
;; do all the ezsteps (if any)
(if ezsteps
(let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
(ezstepslst (hash-table-ref/default testconfig "ezsteps" '())))
(hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
(if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps"))
;; if ezsteps was defined then we are sure to have at least one step but check anyway
(if (not (> (length ezstepslst) 0))
(debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length")
(let loop ((ezstep (car ezstepslst))
(tal (cdr ezstepslst))
(prevstep #f))
|
︙ | | | ︙ | |
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
|
((eq? rollup-status 2) 'warn)
((eq? rollup-status 0) 'pass)
(else 'fail)))
(next-status (cond
((eq? overall-status 'pass) this-step-status)
((eq? overall-status 'warn)
(if (eq? this-step-status 'fail) 'fail 'warn))
(else 'fail))))
(debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used
" this-step-status: " this-step-status " overall-status: " overall-status
" next-status: " next-status " rollup-status: " rollup-status)
(case next-status
((warn)
(set! rollup-status 2)
;; NB// test-set-status! does rdb calls under the hood
(tests:test-set-status! test-id "RUNNING" "WARN"
(if (eq? this-step-status 'warn) "Logpro warning found" #f)
#f))
((pass)
(tests:test-set-status! test-id "RUNNING" "PASS" #f #f))
(else ;; 'fail
(set! rollup-status 1) ;; force fail
(tests:test-set-status! test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
))))
(if (and (steprun-good? logpro-used (vector-ref exit-info 2))
(not (null? tal)))
(loop (car tal) (cdr tal) stepname)))
(debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
(monitorjob (lambda ()
(let* ((start-seconds (current-seconds))
(calc-minutes (lambda ()
(inexact->exact
(round
(-
(current-seconds)
start-seconds)))))
(kill-tries 0))
(let loop ((minutes (calc-minutes)))
(begin
(set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat))
;; open-run-close not needed for test-set-meta-info
(tests:set-meta-info #f test-id run-id test-name itemdat minutes work-area)
(if kill-job?
(begin
(mutex-lock! m)
(let* ((pid (vector-ref exit-info 0)))
(if (number? pid)
(begin
(debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
(let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
(for-each
(lambda (p)
(let* ((parts (string-split p))
(p-id (if (> (length parts) 0)
(string->number (car parts))
#f)))
(if p-id
(begin
(debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id)
(system (conc "kill -9 " p-id))))))
(car processes))
(system (conc "kill -9 -" pid))))
(begin
(debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
(tests:test-set-status! test-id "KILLED" "FAIL"
(args:get-arg "-m") #f)
(sqlite3:finalize! tdb)
(exit 1))))
(set! kill-tries (+ 1 kill-tries))
(mutex-unlock! m)))
;; (sqlite3:finalize! db)
(thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
(loop (calc-minutes)))))))
(th1 (make-thread monitorjob))
(th2 (make-thread runit)))
(set! job-thread th2)
(thread-start! th1)
(thread-start! th2)
(thread-join! th2)
(mutex-lock! m)
(let* ((item-path (item-list->path itemdat))
(testinfo (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path)))
;; Am I completed?
(if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
(begin
(debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
(tests:test-set-status! test-id
(if kill-job? "KILLED" "COMPLETED")
(cond
((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
((eq? rollup-status 0)
;; if the current status is AUTO the defer to the calculated value (i.e. leave this AUTO)
(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
((eq? rollup-status 1) "FAIL")
((eq? rollup-status 2)
;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
(else "FAIL"))
(args:get-arg "-m") #f)))
;; for automated creation of the rollup html file this is a good place...
(if (not (equal? item-path ""))
(tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
)
(mutex-unlock! m)
;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log"))))
;; (success exec-results)) ;; (eq? (cadr exec-results) 0)))
(debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area "
work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
;; (sqlite3:finalize! db)
;; (sqlite3:finalize! tdb)
(if (not (vector-ref exit-info 1))
(exit 4)))))))
;; set up the very basics needed for doing anything here.
(define (setup-for-run)
;; would set values for KEYS in the environment here for better support of env-override but
;; have chicken/egg scenario. need to read megatest.config then read it again. Going to
;; pass on that idea for now
;; special case
(set! *configinfo* (find-and-read-config
(if (args:get-arg "-config")(args:get-arg "-config") "megatest.config")
environ-patt: "env-override"
given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
pathenvvar: "MT_RUN_AREA_HOME"))
(set! *configdat* (if (car *configinfo*)(car *configinfo*) #f))
(set! *toppath* (if (car *configinfo*)(cadr *configinfo*) #f))
(if *toppath*
(setenv "MT_RUN_AREA_HOME" *toppath*) ;; to be deprecated
(debug:print 0 "ERROR: failed to find the top path to your run setup."))
*toppath*)
(define (get-best-disk confdat)
(let* ((disks (hash-table-ref/default confdat "disks" #f))
(best #f)
(bestsize 0))
(if disks
|
|
>
>
>
>
>
>
|
|
|
|
>
|
>
>
>
>
>
>
>
|
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
|
>
|
>
|
|
>
>
>
>
>
>
>
|
|
<
<
<
>
|
>
>
|
|
|
>
>
>
>
|
>
>
>
>
>
>
>
|
<
<
<
<
<
>
>
|
|
|
|
|
|
|
|
|
|
|
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
|
((eq? rollup-status 2) 'warn)
((eq? rollup-status 0) 'pass)
(else 'fail)))
(next-status (cond
((eq? overall-status 'pass) this-step-status)
((eq? overall-status 'warn)
(if (eq? this-step-status 'fail) 'fail 'warn))
(else 'fail)))
(next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
(cond
((null? tal) ;; more to run?
"COMPLETED")
(else "RUNNING")))
)
(debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used
" this-step-status: " this-step-status " overall-status: " overall-status
" next-status: " next-status " rollup-status: " rollup-status)
(case next-status
((warn)
(set! rollup-status 2)
;; NB// test-set-status! does rdb calls under the hood
(tests:test-set-status! test-id next-state "WARN"
(if (eq? this-step-status 'warn) "Logpro warning found" #f)
#f))
((pass)
(tests:test-set-status! test-id next-state "PASS" #f #f))
(else ;; 'fail
(set! rollup-status 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED"
(tests:test-set-status! test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
))))
(if (and (steprun-good? logpro-used (vector-ref exit-info 2))
(not (null? tal)))
(loop (car tal) (cdr tal) stepname)))
(debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
(monitorjob (lambda ()
(let* ((start-seconds (current-seconds))
(calc-minutes (lambda ()
(inexact->exact
(round
(-
(current-seconds)
start-seconds)))))
(kill-tries 0))
(tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
(let loop ((minutes (calc-minutes)))
(begin
(set! kill-job? (or (test-get-kill-request test-id) ;; run-id test-name itemdat))
(and runtlim (let* ((run-seconds (- (current-seconds) start-seconds))
(time-exceeded (> run-seconds runtlim)))
(if time-exceeded
(begin
(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
#t)
#f)))))
;; open-run-close not needed for test-set-meta-info
(tests:set-partial-meta-info #f test-id run-id minutes work-area)
(if kill-job?
(begin
(mutex-lock! m)
;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
;; section and the runit section? Or add a loop that tries three times with a 1/4 second
;; between tries?
(let* ((pid (vector-ref exit-info 0)))
(if (number? pid)
(process-signal pid signal/kill)
;; (begin
;; (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
;; (let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
;; (for-each
;; (lambda (p)
;; (let* ((parts (string-split p))
;; (p-id (if (> (length parts) 0)
;; (string->number (car parts))
;; #f)))
;; (if p-id
;; (begin
;; (debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id)
;; (system (conc "kill -9 " p-id))))))
;; (car processes))
;; (system (conc "kill -9 -" pid))))
(begin
(debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
(tests:test-set-status! test-id "KILLED" "FAIL"
(args:get-arg "-m") #f)
(sqlite3:finalize! tdb)
(exit 1) ;; IS THIS NECESSARY OR WISE???
)))
(set! kill-tries (+ 1 kill-tries))
(mutex-unlock! m)))
;; (sqlite3:finalize! db)
(if keep-going
(begin
(thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
(if keep-going
(loop (calc-minutes)))))))
(tests:update-central-meta-info test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional
(th1 (make-thread monitorjob "monitor job"))
(th2 (make-thread runit "run job")))
(set! job-thread th2)
(thread-start! th1)
(thread-start! th2)
(thread-join! th2)
(set! keep-going #f)
(thread-join! th1)
;; (thread-sleep! 1)
;; (thread-terminate! th1) ;; Not sure if this is a good idea
(thread-sleep! 1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
;; (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f)
(mutex-lock! m)
(let* ((item-path (item-list->path itemdat))
;; only state and status needed - use lazy routine
(testinfo (cdb:remote-run db:get-testinfo-state-status #f test-id))) ;;;(cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path)))
;; Am I completed?
(if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
(let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
;; "COMPLETED"
;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test
)
(new-status (cond
((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
((eq? rollup-status 0)
;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
((eq? rollup-status 1) "FAIL")
((eq? rollup-status 2)
;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
(else "FAIL")))) ;; (db:test-get-status testinfo)))
(debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " rollup-status)
(tests:test-set-status! test-id
new-state
new-status
(args:get-arg "-m") #f)
;; need to update the top test record if PASS or FAIL and this is a subtest
;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
;; (if (not (equal? item-path ""))
;; (begin
;; (thread-sleep! 0.1) ;; give other processes an opportunity to access the db as rollup is lower priority
;; (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))
))
;; for automated creation of the rollup html file this is a good place...
(if (not (equal? item-path ""))
(tests:summarize-items #f run-id test-id test-name #f))) ;; don't force - just update if no
(mutex-unlock! m)
(debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area "
work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
(if (not (vector-ref exit-info 1))
(exit 4)))))))
;; set up the very basics needed for doing anything here.
(define (setup-for-run)
;; would set values for KEYS in the environment here for better support of env-override but
;; have chicken/egg scenario. need to read megatest.config then read it again. Going to
;; pass on that idea for now
;; special case
(if (not (hash-table? *configdat*)) ;; no need to re-open on every call
(begin
(set! *configinfo* (find-and-read-config
(if (args:get-arg "-config")(args:get-arg "-config") "megatest.config")
environ-patt: "env-override"
given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
pathenvvar: "MT_RUN_AREA_HOME"))
(set! *configdat* (if (car *configinfo*)(car *configinfo*) #f))
(set! *toppath* (if (car *configinfo*)(cadr *configinfo*) #f))
(if *toppath*
(setenv "MT_RUN_AREA_HOME" *toppath*) ;; to be deprecated
(debug:print 0 "ERROR: failed to find the top path to your Megatest area."))))
*toppath*)
(define (get-best-disk confdat)
(let* ((disks (hash-table-ref/default confdat "disks" #f))
(best #f)
(bestsize 0))
(if disks
|
︙ | | | ︙ | |
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
|
;;
;; <linkdir> - <target> - <testname> [ - <itempath> ]
;;
;; All log file links should be stored relative to the top of link path
;;
;; <target> - <testname> [ - <itempath> ]
;;
(define (create-work-area run-id run-info key-vals test-id test-src-path disk-path testname itemdat)
(let* ((item-path (item-list->path itemdat))
(runname (db:get-value-by-header (db:get-row run-info)
(db:get-header run-info)
"runname"))
;; convert back to db: from rdb: - this is always run at server end
(target (string-intersperse key-vals "/"))
(not-iterated (equal? "" item-path))
;; all tests are found at <rundir>/test-base or <linkdir>/test-base
(testtop-base (conc target "/" runname "/" testname))
(test-base (conc testtop-base (if not-iterated "" "/") item-path))
|
|
|
|
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
|
;;
;; <linkdir> - <target> - <testname> [ - <itempath> ]
;;
;; All log file links should be stored relative to the top of link path
;;
;; <target> - <testname> [ - <itempath> ]
;;
(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat)
(let* ((item-path (item-list->path itemdat))
(runname (db:get-value-by-header (db:get-row run-info)
(db:get-header run-info)
"runname"))
;; convert back to db: from rdb: - this is always run at server end
(target (string-intersperse (map cadr keyvals) "/"))
(not-iterated (equal? "" item-path))
;; all tests are found at <rundir>/test-base or <linkdir>/test-base
(testtop-base (conc target "/" runname "/" testname))
(test-base (conc testtop-base (if not-iterated "" "/") item-path))
|
︙ | | | ︙ | |
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
|
(debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path)
(if (not (file-exists? linktree))
(begin
(debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree)
(create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
;; create the directory for the tests dir links, this is needed no matter what...
(if (not (directory-exists? lnkbase))
(create-directory lnkbase #t))
;; update the toptest record with its location rundir, cache the path
;; This wass highly inefficient, one db write for every subtest, potentially
;; thousands of unnecessary updates, cache the fact it was set and don't set it
;; again.
|
|
>
|
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
|
(debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path)
(if (not (file-exists? linktree))
(begin
(debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree)
(create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
;; create the directory for the tests dir links, this is needed no matter what...
(if (and (not (directory-exists? lnkbase))
(not (file-exists? lnkbase)))
(create-directory lnkbase #t))
;; update the toptest record with its location rundir, cache the path
;; This wass highly inefficient, one db write for every subtest, potentially
;; thousands of unnecessary updates, cache the fact it was set and don't set it
;; again.
|
︙ | | | ︙ | |
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
|
" - creating link from: " test-path "\n"
" to: " lnktarget)
;; If there is already a symlink delete it and recreate it.
(handle-exceptions
exn
(begin
(debug:print 0 "ERROR: Failed to re-create link " linktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
(exit))
(if (symbolic-link? lnktarget) (delete-file lnktarget))
(if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
;; I suspect this section was deleting test directories under some
;; wierd sitations? This doesn't make sense - reenabling the rm -f
;; I honestly don't remember *why* this chunk was needed...
|
|
|
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
|
" - creating link from: " test-path "\n"
" to: " lnktarget)
;; If there is already a symlink delete it and recreate it.
(handle-exceptions
exn
(begin
(debug:print 0 "ERROR: Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
(exit))
(if (symbolic-link? lnktarget) (delete-file lnktarget))
(if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
;; I suspect this section was deleting test directories under some
;; wierd sitations? This doesn't make sense - reenabling the rm -f
;; I honestly don't remember *why* this chunk was needed...
|
︙ | | | ︙ | |
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
|
;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area
;; 4. remotely run the test on allocated host
;; - could be ssh to host from hosts table (update regularly with load)
;; - could be netbatch
;; (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat params)
(change-directory *toppath*)
(alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
(list ;; (list "MT_TEST_RUN_DIR" work-area)
(list "MT_RUN_AREA_HOME" *toppath*)
(list "MT_TEST_NAME" test-name)
;; (list "MT_ITEM_INFO" (conc itemdat))
(list "MT_RUNNAME" runname)
;; (list "MT_TARGET" mt_target)
))
(let* ((useshell (config-lookup *configdat* "jobtools" "useshell"))
(launcher (config-lookup *configdat* "jobtools" "launcher"))
(runscript (config-lookup test-conf "setup" "runscript"))
(ezsteps (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
(diskspace (config-lookup test-conf "requirements" "diskspace"))
(memory (config-lookup test-conf "requirements" "memory"))
(hosts (config-lookup *configdat* "jobtools" "workhosts"))
(remote-megatest (config-lookup *configdat* "setup" "executable"))
;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to
;; allow running from dashboard. Extract the path
;; from the called megatest and convert dashboard
;; or dboard to megatest
(local-megatest (let* ((lm (car (argv)))
(dir (pathname-directory lm))
(exe (pathname-strip-directory lm)))
|
|
|
>
>
>
>
>
|
|
|
|
|
|
>
>
|
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
|
;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area
;; 4. remotely run the test on allocated host
;; - could be ssh to host from hosts table (update regularly with load)
;; - could be netbatch
;; (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
(change-directory *toppath*)
(alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
(list ;; (list "MT_TEST_RUN_DIR" work-area)
(list "MT_RUN_AREA_HOME" *toppath*)
(list "MT_TEST_NAME" test-name)
;; (list "MT_ITEM_INFO" (conc itemdat))
(list "MT_RUNNAME" runname)
;; (list "MT_TARGET" mt_target)
))
(let* ((useshell (let ((ush (config-lookup *configdat* "jobtools" "useshell")))
(if ush
(if (equal? ush "no") ;; must use "no" to NOT use shell
#f
ush)
#t))) ;; default is yes
(launcher (config-lookup *configdat* "jobtools" "launcher"))
(runscript (config-lookup test-conf "setup" "runscript"))
(ezsteps (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
(diskspace (config-lookup test-conf "requirements" "diskspace"))
(memory (config-lookup test-conf "requirements" "memory"))
(hosts (config-lookup *configdat* "jobtools" "workhosts"))
(remote-megatest (config-lookup *configdat* "setup" "executable"))
(run-time-limit (or (configf:lookup test-conf "requirements" "runtimelim")
(configf:lookup *configdat* "setup" "runtimelim")))
;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to
;; allow running from dashboard. Extract the path
;; from the called megatest and convert dashboard
;; or dboard to megatest
(local-megatest (let* ((lm (car (argv)))
(dir (pathname-directory lm))
(exe (pathname-strip-directory lm)))
|
︙ | | | ︙ | |
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
|
(diskpath #f)
(cmdparms #f)
(fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x))))
(mt-bindir-path #f)
(item-path (item-list->path itemdat))
;; (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path))
(testinfo (cdb:get-test-info-by-id *runremote* test-id))
(mt_target (string-intersperse (map cadr keyvallst) "/"))
(debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '())
(if (args:get-arg "-logging")(list "-logging") '()))))
(if hosts (set! hosts (string-split hosts)))
;; set the megatest to be called on the remote host
(if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
(set! mt-bindir-path (pathname-directory remote-megatest))
(if launcher (set! launcher (string-split launcher)))
;; set up the run work area for this test
(set! diskpath (get-best-disk *configdat*))
(if diskpath
(let ((dat (create-work-area run-id run-info key-vals test-id test-path diskpath test-name itemdat)))
(set! work-area (car dat))
(set! toptest-work-area (cadr dat))
(debug:print-info 2 "Using work area " work-area))
(begin
(set! work-area (conc test-path "/tmp_run"))
(create-directory work-area #t)
(debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))
|
|
>
|
|
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
|
(diskpath #f)
(cmdparms #f)
(fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x))))
(mt-bindir-path #f)
(item-path (item-list->path itemdat))
;; (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path))
(testinfo (cdb:get-test-info-by-id *runremote* test-id))
(mt_target (string-intersperse (map cadr keyvals) "/"))
(debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '())
(if (args:get-arg "-logging")(list "-logging") '()))))
(setenv "MT_ITEMPATH" item-path)
(if hosts (set! hosts (string-split hosts)))
;; set the megatest to be called on the remote host
(if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
(set! mt-bindir-path (pathname-directory remote-megatest))
(if launcher (set! launcher (string-split launcher)))
;; set up the run work area for this test
(set! diskpath (get-best-disk *configdat*))
(if diskpath
(let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
(set! work-area (car dat))
(set! toptest-work-area (cadr dat))
(debug:print-info 2 "Using work area " work-area))
(begin
(set! work-area (conc test-path "/tmp_run"))
(create-directory work-area #t)
(debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))
|
︙ | | | ︙ | |
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
|
(list 'runscript runscript)
(list 'run-id run-id )
(list 'test-id test-id )
(list 'itemdat itemdat )
(list 'megatest remote-megatest)
(list 'ezsteps ezsteps)
(list 'target mt_target)
(list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '()))
(list 'set-vars (if params (hash-table-ref/default params "-setvars" #f)))
(list 'runname runname)
(list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
;; clean out step records from previous run if they exist
;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
;; (open-run-close db:delete-test-step-records db test-id)
(change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
(tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
(cond
((and launcher hosts) ;; must be using ssh hostname
|
>
|
|
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
|
(list 'runscript runscript)
(list 'run-id run-id )
(list 'test-id test-id )
(list 'itemdat itemdat )
(list 'megatest remote-megatest)
(list 'ezsteps ezsteps)
(list 'target mt_target)
(list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
(list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '()))
(list 'set-vars (if params (hash-table-ref/default params "-setvars" #f)))
(list 'runname runname)
(list 'mt-bindir-path mt-bindir-path)))))))
;; clean out step records from previous run if they exist
;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
;; (open-run-close db:delete-test-step-records db test-id)
(change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
(tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
(cond
((and launcher hosts) ;; must be using ssh hostname
|
︙ | | | ︙ | |
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
|
(hash-table-ref/default test-conf "pre-launch-env-overrides" '())))
(miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
(append (list (list "MT_TEST_RUN_DIR" work-area)
(list "MT_TEST_NAME" test-name)
(list "MT_ITEM_INFO" (conc itemdat))
(list "MT_RUNNAME" runname)
(list "MT_TARGET" mt_target)
)
itemdat)))
(launch-results (apply (if (equal? (configf:lookup *configdat* "setup" "launchwait") "yes")
cmd-run-with-stderr->list
process-run)
(if useshell
(string-intersperse fullcmd " ")
(car fullcmd))
(if useshell
'()
(cdr fullcmd)))))
(if (list? launch-results)
(with-output-to-file "mt_launch.log"
(lambda ()
(apply print launch-results))
#:append))
(debug:print 2 "Launching completed, updating db")
(debug:print 2 "Launch results: " launch-results)
(if (not launch-results)
(begin
(print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
;; (sqlite3:finalize! db)
;; good ole "exit" seems not to work
|
>
>
|
>
|
>
>
>
|
>
|
|
>
|
>
|
|
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
|
(hash-table-ref/default test-conf "pre-launch-env-overrides" '())))
(miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
(append (list (list "MT_TEST_RUN_DIR" work-area)
(list "MT_TEST_NAME" test-name)
(list "MT_ITEM_INFO" (conc itemdat))
(list "MT_RUNNAME" runname)
(list "MT_TARGET" mt_target)
(list "MT_ITEMPATH" item-path)
)
itemdat)))
;; Launchwait defaults to true, must override it to turn off wait
(launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
(launch-results (apply (if launchwait
cmd-run-with-stderr->list
process-run)
(if useshell
(let ((cmdstr (string-intersperse fullcmd " ")))
(if launchwait
cmdstr
(conc cmdstr " >> mt_launch.log 2>&1")))
(car fullcmd))
(if useshell
'()
(cdr fullcmd)))))
(if (not launchwait) ;; give the OS a little time to allow the process to start
(thread-sleep! 0.01))
(with-output-to-file "mt_launch.log"
(lambda ()
(if (list? launch-results)
(apply print launch-results)
(print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this"))
#:append))
(debug:print 2 "Launching completed, updating db")
(debug:print 2 "Launch results: " launch-results)
(if (not launch-results)
(begin
(print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
;; (sqlite3:finalize! db)
;; good ole "exit" seems not to work
|
︙ | | | ︙ | |