︙ | | | ︙ | |
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
chicken.time.posix
chicken.random
chicken.process.signal
(prefix base64 base64:)
csv-xml
directory-utils
matchable
regex
s11n
srfi-1
srfi-13
srfi-18
srfi-69
|
>
|
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
chicken.time.posix
chicken.random
chicken.process.signal
(prefix base64 base64:)
csv-xml
directory-utils
format
matchable
regex
s11n
srfi-1
srfi-13
srfi-18
srfi-69
|
︙ | | | ︙ | |
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
|
#f ;; sort-by
#f ;; sort-order
#f ;; get full data (not 'shortlist)
0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
'dashboard)
'()))
(log-dir (conc *toppath* "/logs"))
(log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
(full-log-fname (conc log-dir "/" log-file)))
(if run-pre-hook
(if (null? existing-tests)
(let* ((use-log-dir (if (not (directory-exists? log-dir))
(handle-exceptions
exn
(begin
|
|
|
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
|
#f ;; sort-by
#f ;; sort-order
#f ;; get full data (not 'shortlist)
0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
'dashboard)
'()))
(log-dir (conc *toppath* "/logs"))
(log-file (conc "pre-hook-" (string-translate (get-environment-variable "MT_TARGET") "/" "-") "-" (get-environment-variable "MT_RUNNAME") ".log"))
(full-log-fname (conc log-dir "/" log-file)))
(if run-pre-hook
(if (null? existing-tests)
(let* ((use-log-dir (if (not (directory-exists? log-dir))
(handle-exceptions
exn
(begin
|
︙ | | | ︙ | |
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
|
;;
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
(let* ((keys (keys:config-get-fields *configdat*))
(keyvals (keys:target->keyval keys target))
(run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name)))
;; (deferred '()) ;; delay running these since they have a waiton clause
(runconfigf (conc *toppath* "/runconfigs.config"))
(dbfile (conc *toppath* "/megatest.db"))
(readonly-mode (not (file-writable? dbfile)))
(test-records (make-hash-table))
;; need to process runconfigs before generating these lists
(all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
(all-test-names #f) ;; (hash-table-keys all-tests-registry))
(test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
(required-tests #f) ;; Put fully qualified test/testpath names in this list to be done
|
|
|
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
|
;;
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
(let* ((keys (keys:config-get-fields *configdat*))
(keyvals (keys:target->keyval keys target))
(run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name)))
;; (deferred '()) ;; delay running these since they have a waiton clause
(runconfigf (conc *toppath* "/runconfigs.config"))
(dbfile (conc *toppath* "/.db/main.db"))
(readonly-mode (not (file-writable? dbfile)))
(test-records (make-hash-table))
;; need to process runconfigs before generating these lists
(all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
(all-test-names #f) ;; (hash-table-keys all-tests-registry))
(test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
(required-tests #f) ;; Put fully qualified test/testpath names in this list to be done
|
︙ | | | ︙ | |
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
|
;; Ensure all tests are registered in the test_meta table
(runs:update-all-test_meta #f)
;; run the run prehook if there are no tests yet run for this run:
;;
(runs:run-pre-hook run-id)
;; mark all test launched flag as false in the meta table
(rmt:set-var (conc "lunch-complete-" run-id) "no")
(debug:print-info 1 *default-log-port* "Setting end-of-run to no")
(let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns")))
(if x (string->number x) #f)))
(config-rerun-cnt (if config-reruns
config-reruns
1)))
(if (eq? config-rerun-cnt run-count)
(rmt:set-var (conc "end-of-run-" run-id) "no")))
(rmt:set-run-state-status run-id "new" "n/a")
;; now add non-directly referenced dependencies (i.e. waiton)
;;======================================================================
;; refactoring this block into tests:get-full-data
;;
;; What happended, this code is now duplicated in tests!?
;;
;;======================================================================
(if (not (null? test-names)) ;; BEGIN test-names loop
(let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names
(tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc
(debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names)
(change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
(setenv "MT_TEST_NAME" hed) ;;
(let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry))
;; NOTE: Have the config - can extract [waitons] section
((hed-mode)
(let ((m (configf:lookup config "requirements" "mode")))
(if m (map string->symbol (string-split m)) '(normal))))
|
|
|
|
|
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
|
;; Ensure all tests are registered in the test_meta table
(runs:update-all-test_meta #f)
;; run the run prehook if there are no tests yet run for this run:
;;
(runs:run-pre-hook run-id)
;; mark all test launched flag as false in the meta table
(rmt:set-var run-id (conc "lunch-complete-" run-id) "no")
(debug:print-info 1 *default-log-port* "Setting end-of-run to no")
(let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns")))
(if x (string->number x) #f)))
(config-rerun-cnt (if config-reruns
config-reruns
1)))
(if (eq? config-rerun-cnt run-count)
(rmt:set-var run-id (conc "end-of-run-" run-id) "no")))
(rmt:set-run-state-status run-id "new" "n/a")
;; now add non-directly referenced dependencies (i.e. waiton)
;;======================================================================
;; refactoring this block into tests:get-full-data
;;
;; What happended, this code is now duplicated in tests!?
;;
;;======================================================================
(if (not (null? test-names)) ;; BEGIN test-names loop
(let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names
(tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc
(debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names)
(change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
(set-environment-variable! "MT_TEST_NAME" hed) ;;
(let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry))
;; NOTE: Have the config - can extract [waitons] section
((hed-mode)
(let ((m (configf:lookup config "requirements" "mode")))
(if m (map string->symbol (string-split m)) '(normal))))
|
︙ | | | ︙ | |
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
|
exn
(debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
(runs:find-and-mark-incomplete-and-check-end-of-run run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
run-ids)))
"runs: mark-incompletes")))
;; (thread-start! th1)
(thread-start! th2)
;; (thread-join! th1)
;; just do the main stuff in the main thread
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
(any->number reglen) all-tests-registry)
(set! keep-going #f)
(thread-join! th2)
;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
(if (> run-count 0) ;; handle reruns
(begin
(if (not (hash-table-ref/default flags "-preclean" #f))
(hash-table-set! flags "-preclean" #t))
(if (not (hash-table-ref/default flags "-rerun" #f))
(hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
|
>
|
|
|
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
|
exn
(debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
(runs:find-and-mark-incomplete-and-check-end-of-run run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
run-ids)))
"runs: mark-incompletes")))
;; (thread-start! th1)
(thread-start! th2)
(thread-join! th2) ;; turn off marking incompletes in parallel. see if it is related to the db locks we are seeing.
;; just do the main stuff in the main thread
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
(any->number reglen) all-tests-registry)
(set! keep-going #f)
;; (thread-join! th2)
;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
(if (> run-count 0) ;; handle reruns
(begin
(if (not (hash-table-ref/default flags "-preclean" #f))
(hash-table-set! flags "-preclean" #t))
(if (not (hash-table-ref/default flags "-rerun" #f))
(hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
|
︙ | | | ︙ | |
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
|
;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current)
((or (null? prereqs-not-met)
(and (member 'toplevel testmode)
(null? non-completed)))
(debug:print-info 4 *default-log-port* "cond branch - " "ei-2")
(debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
(let ((test-name (tests:testqueue-get-testname test-record)))
(setenv "MT_TEST_NAME" test-name) ;;
(setenv "MT_RUNNAME" runname)
(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
(let ((items-list (items:get-items-from-config tconfig)))
(if (list? items-list)
(begin
(if (null? items-list)
(let ((test-id (rmt:get-test-id run-id test-name ""))
(num-items (rmt:test-toplevel-num-items run-id test-name)))
|
|
|
|
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
|
;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current)
((or (null? prereqs-not-met)
(and (member 'toplevel testmode)
(null? non-completed)))
(debug:print-info 4 *default-log-port* "cond branch - " "ei-2")
(debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
(let ((test-name (tests:testqueue-get-testname test-record)))
(set-environment-variable! "MT_TEST_NAME" test-name) ;;
(set-environment-variable! "MT_RUNNAME" runname)
(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
(let ((items-list (items:get-items-from-config tconfig)))
(if (list? items-list)
(begin
(if (null? items-list)
(let ((test-id (rmt:get-test-id run-id test-name ""))
(num-items (rmt:test-toplevel-num-items run-id test-name)))
|
︙ | | | ︙ | |
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
|
(if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED")))
(not (and prevdat
(equal? state (db:test-get-state prevdat))
(equal? status (db:test-get-status prevdat)))))
(let ((fmt (runs:gendat-inc-results-fmt runs-data))
(dtime (seconds->year-work-week/day-time event-time)))
(if (runs:lownoise "inc-print" 600)
(format #t fmt "State" "Status" "Start Time" "Duration" "Test path"))
;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime)
;; (debug:print 0 #f "event-time: " event-time " duration: " duration)
(format #t fmt
state
status
dtime
(seconds->hr-min-sec duration)
|
>
>
|
|
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
|
(if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED")))
(not (and prevdat
(equal? state (db:test-get-state prevdat))
(equal? status (db:test-get-status prevdat)))))
(let ((fmt (runs:gendat-inc-results-fmt runs-data))
(dtime (seconds->year-work-week/day-time event-time)))
(if (runs:lownoise "inc-print" 600)
(begin
(print "fmt=" fmt)
(format #t fmt "State" "Status" "Start Time" "Duration" "Test path")))
;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime)
;; (debug:print 0 #f "event-time: " event-time " duration: " duration)
(format #t fmt
state
status
dtime
(seconds->hr-min-sec duration)
|
︙ | | | ︙ | |
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
|
(debug:print-info 0 *default-log-port* "Have leftovers!")
(loop (car reg)(cdr reg) '() reruns))
(else
(debug:print-info 4 *default-log-port* "cond branch - " "rtq-9")
(debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns))
))) ;; end loop on sorted test names
;; this is the point where everything is launched and now you can mark the run in metadata table as all launched
(rmt:set-var (conc "lunch-complete-" run-id) "yes")
;; now *if* -run-wait we wait for all tests to be done
;; Now wait for any RUNNING tests to complete (if in run-wait mode)
;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
(thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
(let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))
|
|
|
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
|
(debug:print-info 0 *default-log-port* "Have leftovers!")
(loop (car reg)(cdr reg) '() reruns))
(else
(debug:print-info 4 *default-log-port* "cond branch - " "rtq-9")
(debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns))
))) ;; end loop on sorted test names
;; this is the point where everything is launched and now you can mark the run in metadata table as all launched
(rmt:set-var run-id (conc "lunch-complete-" run-id) "yes")
;; now *if* -run-wait we wait for all tests to be done
;; Now wait for any RUNNING tests to complete (if in run-wait mode)
;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
(thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
(let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))
|
︙ | | | ︙ | |
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
|
(debug:print-info 4 *default-log-port*
"\nTESTNAME: " full-test-name
"\n test-config: " (hash-table->alist test-conf)
"\n itemdat: " itemdat
)
(debug:print 2 *default-log-port* "Attempting to launch test " full-test-name)
;; (setenv "MT_TEST_NAME" test-name) ;;
;; (setenv "MT_ITEMPATH" item-path)
;; (setenv "MT_RUNNAME" runname)
(change-directory *toppath*)
;; Here is where the test_meta table is best updated
;; Yes, another use of a global for caching. Need a better way?
;;
;; There is now a single call to runs:update-all-test_meta and this
;; per-test call is not needed. Given the delicacy of the move to
|
|
|
|
|
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
|
(debug:print-info 4 *default-log-port*
"\nTESTNAME: " full-test-name
"\n test-config: " (hash-table->alist test-conf)
"\n itemdat: " itemdat
)
(debug:print 2 *default-log-port* "Attempting to launch test " full-test-name)
;; (set-environment-variable! "MT_TEST_NAME" test-name) ;;
;; (set-environment-variable! "MT_ITEMPATH" item-path)
;; (set-environment-variable! "MT_RUNNAME" runname)
(change-directory *toppath*)
;; Here is where the test_meta table is best updated
;; Yes, another use of a global for caching. Need a better way?
;;
;; There is now a single call to runs:update-all-test_meta and this
;; per-test call is not needed. Given the delicacy of the move to
|
︙ | | | ︙ | |
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
|
(state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
(rp-mutex (make-mutex))
(bup-mutex (make-mutex))
(keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
(test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop
(let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
(dbfile (conc *toppath* "/megatest.db"))
(readonly-mode (not (file-writable? dbfile))))
(when (and readonly-mode
(member action write-access-actions))
(debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .")
(exit 1)))
(debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
(if (> 2 (length state-status))
(begin
(debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
(exit)))
|
|
|
|
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
|
(state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
(rp-mutex (make-mutex))
(bup-mutex (make-mutex))
(keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
(test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop
(let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
(dbfile (conc *toppath* "/.db/main.db"))
(readonly-mode (not (file-writable? dbfile))))
(when (and readonly-mode
(member action write-access-actions))
(debug:print-error 0 *default-log-port* ".db/main.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .")
(exit 1)))
(debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
(if (> 2 (length state-status))
(begin
(debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
(exit)))
|
︙ | | | ︙ | |
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
|
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
#t)
(process-signal pid signal/int)
(thread-sleep! 5)
(if (process:alive? pid)
(process-signal pid signal/kill)))))
;; (call-with-environment-variables
(let ((old-targethost (getenv "TARGETHOST")))
(setenv "TARGETHOST" hostname)
(setenv "TARGETHOST_LOGF" "server-kills.log")
(system (conc "nbfake kill " pid))
(if old-targethost (setenv "TARGETHOST" old-targethost))
(unsetenv "TARGETHOST")
(unsetenv "TARGETHOST_LOGF"))))
(debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
records)))
(define (task:get-run-times)
(let* (
(run-patt (if (args:get-arg "-run-patt")
(args:get-arg "-run-patt")
|
|
|
|
|
|
|
|
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
|
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
#t)
(process-signal pid signal/int)
(thread-sleep! 5)
(if (process:alive? pid)
(process-signal pid signal/kill)))))
;; (call-with-environment-variables
(let ((old-targethost (get-environment-variable "TARGETHOST")))
(set-environment-variable! "TARGETHOST" hostname)
(set-environment-variable! "TARGETHOST_LOGF" "server-kills.log")
(system (conc "nbfake kill " pid))
(if old-targethost (set-environment-variable! "TARGETHOST" old-targethost))
(unset-environment-variable! "TARGETHOST")
(unset-environment-variable! "TARGETHOST_LOGF"))))
(debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
records)))
(define (task:get-run-times)
(let* (
(run-patt (if (args:get-arg "-run-patt")
(args:get-arg "-run-patt")
|
︙ | | | ︙ | |