Overview
Context
Changes
Modified db.scm
from [95bd13385e]
to [b980dbcc63].
︙ | | |
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
|
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
|
-
+
|
(debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
(apply open-run-close-exception-handling proc idb params))
(apply open-run-close-no-exception-handling proc idb params)))
;; (define open-run-close
(define open-run-close open-run-close-exception-handling)
;; open-run-close-no-exception-handling
open-run-close-exception-handling)
;; open-run-close-exception-handling)
;;)
(define (db:initialize-main-db db)
(let* ((configdat (car *configinfo*)) ;; tut tut, global warning...
(keys (keys:config-get-fields configdat))
(havekeys (> (length keys) 0))
(keystr (keys->keystr keys))
|
︙ | | |
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
|
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
|
-
-
-
-
-
-
-
+
+
+
+
+
+
+
|
(db:general-call db 'top-test-set-running (list test-name))
(db:general-call db 'top-test-set-per-pf-counts (list test-name test-name test-name)))
#f)
#f))
(define (db:tests-register-test dbstruct run-id test-name item-path)
(sqlite3:execute (db:get-db dbstruct run-id) 'register-test run-id test-name item-path))
(let ((sleep-time (random 20))
(err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
(case err-status
((busy)(thread-sleep! 4))
(else
(debug:print 0 "WARNING: possible problem with call to cdb:remote-run, database may be read-only and locked, waiting and trying again ...")
(thread-sleep! sleep-time)))
;; (let ((sleep-time (random 20))
;; (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
;; (case err-status
;; ((busy)(thread-sleep! 4))
;; (else
;; (debug:print 0 "WARNING: possible problem with call to cdb:remote-run, database may be read-only and locked, waiting and trying again ...")
;; (thread-sleep! sleep-time)))
(define (db:test-get-logfile-info dbstruct run-id test-name)
(let ((res #f))
(sqlite3:for-each-row
(lambda (path final_logf)
;; (let ((path (sdb:qry 'getstr path-id))
;; (final_logf (sdb:qry 'getstr final_logf-id)))
|
︙ | | |
Modified launch.scm
from [59d0d6772c]
to [dd204b865e].
︙ | | |
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
+
-
+
+
|
(set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
;; environment overrides are done *before* the remaining critical envars.
(alist->env-vars env-ovrd)
(set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
(set-item-env-vars itemdat)
(save-environment-as-files "megatest")
;; open-run-close not needed for test-set-meta-info
;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
(tests:set-full-meta-info test-id run-id 0 work-area)
;; (tests:set-full-meta-info test-id run-id 0 work-area)
(tests:set-full-meta-info #f test-id run-id 0 work-area 10)
;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(thread-sleep! 0.3) ;; NFS slowness has caused grief here
(if (args:get-arg "-xterm")
(set! fullrunscript "xterm")
|
︙ | | |
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
|
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
|
+
-
+
+
+
+
|
(calc-minutes (lambda ()
(inexact->exact
(round
(-
(current-seconds)
start-seconds)))))
(kill-tries 0))
;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
(tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
(tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
(let loop ((minutes (calc-minutes)))
(begin
(set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
(and runtlim (let* ((run-seconds (- (current-seconds) start-seconds))
(time-exceeded (> run-seconds runtlim)))
(if time-exceeded
(begin
(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
#t)
#f)))))
;; open-run-close not needed for test-set-meta-info
;; (tests:set-partial-meta-info #f test-id run-id minutes work-area)
(tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)
;; (tests:set-partial-meta-info #f test-id run-id minutes work-area 10)
;; (tests:set-partial-meta-info test-id run-id minutes work-area)
(if kill-job?
(begin
(mutex-lock! m)
;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
;; section and the runit section? Or add a loop that tries three times with a 1/4 second
;; between tries?
|
︙ | | |
Modified megatest.scm
from [a6d2c3a647]
to [f5cb1d64b3].
︙ | | |
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
|
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
|
-
+
+
+
+
|
(exit 6))
(let* ((stepname (args:get-arg "-runstep"))
(logprofile (args:get-arg "-logpro"))
(logfile (conc stepname ".log"))
(cmd (if (null? remargs) #f (car remargs)))
(params (if cmd (cdr remargs) '()))
(exitstat #f)
(shell (last (string-split (get-environment-variable "SHELL") "/")))
(shell (let ((sh (get-environment-variable "SHELL") ))
(if sh
(last (string-split sh "/"))
"bash")))
(redir (case (string->symbol shell)
((tcsh csh ksh) ">&")
((zsh bash sh ash) "2>&1 >")
(else ">&")))
(fullcmd (conc "(" (string-intersperse
(cons cmd params) " ")
") " redir " " logfile)))
|
︙ | | |
Modified tests.scm
from [6b6e58b833]
to [c93debf593].
︙ | | |
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
|
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
|
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
(rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)
(if minutes
(rmt:general-call 'update-run-duration run-id minutes test-id))
(if (and uname hostname)
(rmt:general-call 'update-uname-host run-id uname hostname test-id)))
;; This one is for running with no db access (i.e. via rmt: internally)
(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
(define (tests:set-full-meta-info test-id run-id minutes work-area)
(let* ((num-records 0)
(cpuload (get-cpu-load))
(diskfree (get-df (current-directory)))
(uname (get-uname "-srvpio"))
(hostname (get-host-name)))
(tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes)
(tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
(define (tests:set-partial-meta-info test-id run-id minutes work-area)
;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
;; (let ((remtries 10))
(handle-exceptions
exn
(if (> remtries 0)
(begin
(set! remtries (- remtries 1))
(thread-sleep! 10)
(tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
(let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
(debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
(debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
(print "exn=" (condition->list exn))
(debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn))
(print-call-chain)))
(let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb))
(cpuload (get-cpu-load))
(diskfree (get-df (current-directory)))
(uname (get-uname "-srvpio"))
(hostname (get-host-name)))
(tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
(tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))))
;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
(let* ((cpuload (get-cpu-load))
(diskfree (get-df (current-directory))))
(tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes)))
(diskfree (get-df (current-directory)))
(remtries 10))
(handle-exceptions
exn
(if (> remtries 0)
(begin
(set! remtries (- remtries 1))
(thread-sleep! 10)
(tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
(let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
(debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
(debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
(print "exn=" (condition->list exn))
(debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn))
(print-call-chain)))
(tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
)))
;;======================================================================
;; A R C H I V I N G
;;======================================================================
(define (test:archive db test-id)
#f)
|
︙ | | |