Overview
Context
Changes
Modified common.scm
from [95cd7a5e85]
to [7944e605d1].
︙ | | |
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
-
-
+
+
-
-
+
-
-
-
-
-
-
+
|
(declare (unit common))
(declare (uses commonmod))
(import commonmod)
(include "common_records.scm")
;; (require-library margs)
(define (remove-server-files directory-path)
(let ((files (glob (string-append directory-path "/server*"))))
;; (include "margs.scm")
(for-each delete-file* files)))
;; (define old-exit exit)
;;
;; (define (exit . code)
;; (if (null? code)
;; (old-exit)
;; (old-exit code)))
(define (stop-the-train)
(thread-start! (make-thread (lambda ()
(let loop ()
(if (and *toppath*
(file-exists? (conc *toppath*"/stop-the-train")))
(begin
(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
(remove-server-files (conc *toppath* "/logs"))
(exit 1)))
(thread-sleep! 5)
(loop))))))
;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
|
︙ | | |
Modified dbfile.scm
from [3caef9c8a2]
to [14d0a1b210].
︙ | | |
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
|
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
|
-
+
+
-
+
|
(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
(if sync-mode
(sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
(if journal-mode
(sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
(if (and init-proc (not db-exists))
(init-proc db))
db)))
db))
expire-time: 30)
(begin
(if (file-exists? fname )
(let ((db (sqlite3:open-database fname)))
;; pragmas synchronous not needed because this db is used read-only
;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
db )
(print "file doesn't exist: " fname))))
(print "cautious-open-database: file doesn't exist: " fname))))
(exn (io-error)
(dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
(retry))
(exn (corrupt)
(dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
(retry))
(exn (busy)
|
︙ | | |
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
|
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
|
+
+
-
+
+
+
|
(thread-sleep! 0.25)
(if (file-exists? fname)
(handle-exceptions exn
#f
(with-input-from-file fname
(lambda ()
(equal? key-string (read-line)))))
(begin
(dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 0.25 seconds later")
#f)
#f
)
)
)
)
)
)
(define (dbfile:simple-file-lock-and-wait fname #!key (expire-time 300))
(let ((end-time (+ expire-time (current-seconds))))
|
︙ | | |
Modified launch.scm
from [60d380c61b]
to [6c7fb3bbfc].
︙ | | |
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
|
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
|
+
+
+
-
+
|
(list "MT_RUNNAME" runname)
(list "MT_MEGATEST" megatest)
(list "MT_TARGET" target)
(list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
(list "MT_TESTSUITENAME" (common:get-testsuite-name))))
;;(bb-check-path msg: "launch:execute post block 3")
(let ((tmppath (getenv "PATH")))
(if (string-search tmppath " ")
(debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported."))
(if mt-bindir-path (setenv "PATH" (conc "\""(getenv "PATH")":"mt-bindir-path"\"")))
(if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path))))
;;(bb-check-path msg: "launch:execute post block 4")
;; (change-directory top-path)
;; Can setup as client for server mode now
;; (client:setup)
;; environment overrides are done *before* the remaining critical envars.
|
︙ | | |
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
|
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
|
-
+
|
runname
(common:file-exists? fulldir))
(let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds)))
(targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash))
(rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
(if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
(begin
(debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
(debug:print-info 2 *default-log-port* "Caching megatest.config in " tmpfile)
(if (not (common:in-running-test?))
(configf:write-alist *configdat* tmpfile))
(system (conc "ln -sf " tmpfile " " targfile))))
)))
(debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
|
︙ | | |
Modified megatest.scm
from [0a3a4d7312]
to [5e043f1c71].
︙ | | |
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
+
+
+
-
+
+
+
+
|
(dbfile:db-init-proc db:initialize-main-db)
;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
(if (common:file-exists? debugcontrolf)
(begin
;; for some reason, debug:print does not work here. Had to use print.
(print (conc "WARNING: loading " debugcontrolf))
(load debugcontrolf)))
(load debugcontrolf)
)
)
)
;; usage logging, careful with this, it is not designed to deal with all real world challenges!
;;
(if (and *usage-log-file*
(file-write-access? *usage-log-file*))
(with-output-to-file
*usage-log-file*
|
︙ | | |
Modified portlogger.scm
from [36a4964f50]
to [6fdf0de81e].
︙ | | |
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
-
+
+
|
(declare (unit portlogger))
(declare (uses db))
;; lsof -i
(define (portlogger:open-db fname)
(let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
(let* (;; (avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
(avail #t)
(exists (common:file-exists? fname))
(db (if avail
(sqlite3:open-database fname)
(begin
(system (conc "rm -f " fname))
(sqlite3:open-database fname))))
(handler (sqlite3:make-busy-timeout 136000))
|
︙ | | |
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
-
-
+
+
|
port INTEGER PRIMARY KEY,
state TEXT DEFAULT 'not-used',
fail_count INTEGER DEFAULT 0,
update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
db))
(define (portlogger:open-run-close proc . params)
(let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db"))
(avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
(let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")))
;; (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
(handle-exceptions
exn
(begin
;; (release-dot-lock fname)
(debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 5 *default-log-port* "exn=" (condition->list exn))
|
︙ | | |
Modified rmt.scm
from [df8aa04bdb]
to [f953bf98e7].
︙ | | |
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
|
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
|
-
+
|
(debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
(mutex-unlock! *rmt-mutex*)
(if success ;; success only tells us that the transport was
;; successful, have to examine the data to see if
;; there was a detected issue at the other end
(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
(begin
(debug:print-error 0 *default-log-port* " dat=" dat)
(debug:print-info 0 *default-log-port* "Bad return data from Megatest server: dat=" dat)
(extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
)))
(define (rmt:print-db-stats)
(let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
(debug:print 18 *default-log-port* "DB Stats\n========")
(debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
|
︙ | | |
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
|
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
|
+
|
(all-dat (cdr run-dat))
(tests-data (alist-ref "data" all-dat equal?))
(run-meta (alist-ref "meta" all-dat equal?))
(run-id (string->number (alist-ref "id" run-meta equal?))))
(rmt:insert-run run-id target runname run-meta)
(if (list? tests-data)
(begin
(debug:print 0 *default-log-port* "Inserting " (length tests-data) " tests in run " runname)
(for-each
(lambda (test-dat)
(let* ((test-id (car test-dat))
(test-rec (cdr test-dat)))
(rmt:insert-test run-id test-rec)))
tests-data)
)
|
︙ | | |
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
|
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
|
-
|
(item-path (alist-ref "item_path" test-rec equal?))
(test-id (rmt:get-test-id run-id testname item-path))
)
(if test-id
(debug:print 0 *default-log-port* "test "testname"/"item-path " already exists in run-id " run-id)
(begin
(debug:print 0 *default-log-port* " Insert test in run "run-id": "testname"/"item-path)
(rmt:send-receive 'insert-test run-id test-rec)
)
)
)
)
|
Modified runs.scm
from [9381a36070]
to [9b84cba39d].
︙ | | |
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
|
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
|
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
|
(if (not (null? required-tests))
(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
;; NOTE: these are all parent tests, items are not expanded yet.
(debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
(let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
(if (> (length (hash-table-keys test-records)) 0)
(let* ((keep-going #t)
(let* ()
(run-queue-retries 5)
;; (th1 (make-thread (lambda ()
;; (handle-exceptions
;; exn
;; (begin
;; (print-call-chain)
;; (print " message: " ((condition-property-accessor 'exn 'message) exn)))
;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
;; (any->number reglen) all-tests-registry)))
;; "runs:run-tests-queue"))
#;(th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here?
;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
(let ((run-ids (rmt:get-all-run-ids)))
(for-each (lambda (run-id)
(if keep-going
(handle-exceptions
exn
(debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
(rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
run-ids)))
"runs: mark-incompletes")))
;; (thread-start! th1)
;; (thread-start! th2)
;; (thread-join! th1)
;; just do the main stuff in the main thread
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
(any->number reglen) all-tests-registry)
(set! keep-going #f)
#;(thread-join! th2)
;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
(if (> run-count 0) ;; handle reruns
(begin
(if (not (hash-table-ref/default flags "-preclean" #f))
(hash-table-set! flags "-preclean" #t))
(if (not (hash-table-ref/default flags "-rerun" #f))
(hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
;; recursive call to self
(runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
(launch:end-of-run-check run-id)))
(runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
(launch:end-of-run-check run-id)))
(debug:print-info 0 *default-log-port* "No tests to run")))
(debug:print-info 4 *default-log-port* "All done by here")
;; TODO: try putting post hook call here
; (debug:print-info 2 *default-log-port* " run-count " run-count)
; (runs:run-post-hook run-id))
; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))
|
︙ | | |
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
|
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
|
-
+
|
(lambda (key)
(let* ((idx (cadr key))
(fld (car key))
(val (configf:lookup test-conf "test_meta" fld)))
;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
(if (and val (not (equal? (vector-ref currrecord idx) val)))
(begin
(debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val)
(debug:print 2 *default-log-port* "Updating " test-name " " fld " to " val)
(rmt:testmeta-update-field test-name fld val)))))
'(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))
;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."
;;
(define (runs:get-tests-matching-tags tagpatt)
(let* ((tagdata (rmt:get-tests-tags))
|
︙ | | |
Modified tests.scm
from [5c2006972a]
to [fd3f543001].
︙ | | |
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
|
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
|
-
+
|
(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data
(if tcfg (hash-table-set! *testconfigs* test-full-name tcfg))
(if (and testexists
cache-file
(file-write-access? cache-path)
allow-write-cache)
(let ((tpath (conc cache-path "/.testconfig")))
(debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
(debug:print-info 2 *default-log-port* "Caching testconfig for " test-name " in " tpath)
(if (and tcfg (not (common:in-running-test?)))
(configf:write-alist tcfg tpath))))
tcfg))))))
;; sort tests by priority and waiton
;; Move test specific stuff to a test unit FIXME one of these days
(define (tests:sort-by-priority-and-waiton test-records)
|
︙ | | |