Megatest: Diff

Differences From Artifact [89f36cc1cb]:

File runs.scm — part of check-in [0c7e3bc287] at 2022-05-13 12:16:07 on branch v1.65 — Fixed few things with the hasty implementation of global waitons. (user: mrwellan, size: 164093) [annotate] [blame] [check-ins using]

To Artifact [d6ca8ef849]:

File runs.scm — part of check-in [0c24209eae] at 2024-03-20 17:26:45 on branch v1.70 — Removed the thread doing rmt:find-and-mark-incomplete, changed an ERROR to INFO message (user: mmgraham, size: 164373) [annotate] [blame] [check-ins using]

︙			︙
98 99 100 101 102 103 104 ~~105 106~~ 107 108 109 ~~110~~ 111 112 113 114 115 116 117	(if (runs:lownoise "runners-softlock-wait" 360) (debug:print-info 0 default-log-port "Other runners in flight, giving up some time...")) (thread-sleep! 2)) (begin (if (runs:lownoise "runners-softlock-nowait" 360) (debug:print-info 0 default-log-port "No runners in flight, updating softlock")) (let* ((ouf (open-output-file my-lock-file))) ~~(with-output-to-port ouf ~~(lambda ()(print (current-seconds))))~~~~ (close-output-port ouf)))) (runs:dat-last-fuel-check-set! rdat (current-seconds)))))) ~~;; Fourth try, do accounting through time~~ ;; (define (runs:parallel-runners-mgmt rdat) (let ((time-to-check (configf:lookup-number configdat "runners" "time-to-check" default: 10)) ;; 28 (time-to-wait (configf:lookup-number configdat "runners" "time-to-wait" default: 30)) (now-time (current-seconds))) (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check (runs:wait-on-softlock rdat "runners"))))	\| < \|	98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116	(if (runs:lownoise "runners-softlock-wait" 360) (debug:print-info 0 default-log-port "Other runners in flight, giving up some time...")) (thread-sleep! 2)) (begin (if (runs:lownoise "runners-softlock-nowait" 360) (debug:print-info 0 default-log-port "No runners in flight, updating softlock")) (let* ((ouf (open-output-file my-lock-file))) (with-output-to-port ouf (lambda ()(print (current-seconds)))) (close-output-port ouf)))) (runs:dat-last-fuel-check-set! rdat (current-seconds)))))) ;; Fourth try, do accounting through time.... ;; (define (runs:parallel-runners-mgmt rdat) (let ((time-to-check (configf:lookup-number configdat "runners" "time-to-check" default: 10)) ;; 28 (time-to-wait (configf:lookup-number configdat "runners" "time-to-wait" default: 30)) (now-time (current-seconds))) (if (> (- now-time (runs:dat-last-fuel-check rdat)) time-to-check) ;; time to check (runs:wait-on-softlock rdat "runners"))))
︙			︙
226 227 228 229 230 231 232 ~~233 234 235~~ 236 237 238 239 240 241 242	(launch:setup force-reread: #t) (fatal-loop (+ count 1))) (begin (debug:print 0 default-log-port "FATAL: configdat was inaccessible! This should never happen. Retried " count " times. Message: " msg) (debug:print 0 default-log-port "Call chain:") (with-output-to-port default-log-port ~~(lambda () (print "configdat is >>"configdat"<<")~~ (pp configdat) (pp call-chain))) (exit 1)))) ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") (when (or (not configdat) (not (hash-table? configdat))) (debug:print 0 default-log-port "WARNING: configdat was inaccessible! This should never happen. Brute force reread.")	< \| \|	225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240	(launch:setup force-reread: #t) (fatal-loop (+ count 1))) (begin (debug:print 0 default-log-port "FATAL: configdat was inaccessible! This should never happen. Retried " count " times. Message: " msg) (debug:print 0 default-log-port "Call chain:") (with-output-to-port default-log-port (lambda () (print "configdat is >>"configdat"<<") (pp configdat) (pp call-chain))) (exit 1)))) ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") (when (or (not configdat) (not (hash-table? configdat))) (debug:print 0 default-log-port "WARNING: configdat was inaccessible! This should never happen. Brute force reread.")
︙			︙
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 ~~328~~ 329 330 331 332 333 334 335	(currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! runs:denoise key currtime) #t) #f))) (define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) ~~(job-group-limit (let ((jobg-count (configf:lookup configdat "jobgroups" jobgroup)))~~ (if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (runs:inc-can-run-more-tests-count runsdat)) ;; (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running)) (begin	> > > > > > > > > > > > > > > \|	308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348	(currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! runs:denoise key currtime) #t) #f))) (define last-test-launch 0) (define too-soon-delays (make-hash-table)) ;; to-soon delay, when matching event happened in less than dseconds delay wseconds ;; (define (runs:too-soon-delay key dseconds wseconds) (let* ((last-time (hash-table-ref/default too-soon-delays key #f))) (if (and last-time (< (- (current-seconds) last-time) dseconds)) (begin (if (runs:lownoise (conc "too-soon-delay"key) 60) (debug:print-info 2 default-log-port "Polling throttle for "key)) (thread-sleep! wseconds))) (hash-table-set! too-soon-delays key (current-seconds)))) (define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) ;; Take advantage of a good place to exit if running the one-pass methodology (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) (args:get-arg "-one-pass")) (exit 0)) (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (configf:lookup-number configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (runs:inc-can-run-more-tests-count runsdat)) ;; (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running)) (begin
︙			︙
506 507 508 509 510 511 512 ~~513 514~~ 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 ~~531~~ 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 ~~549~~ 550 551 552 ~~553~~ 554 555 556 557 558 559 560	;; run-count is passed from megatest.scm as configf:lookup configdat "setup" "reruns", or defaults to 1. (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) ;; (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc toppath "/runconfigs.config")) ~~(dbfile (conc toppath "/megatest.db")) (readonly-mode (not (file-write-access? dbfile)))~~ (test-records (make-hash-table)) ;; need to process runconfigs before generating these lists (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) ;; (tdbdat (tasks:open-db)) (config-reruns (let ((x (configf:lookup configdat "setup" "reruns"))) (if x (string->number x) #f))) (allowed-tests #f) (runconf #f)) ;; check if readonly (when readonly-mode ~~(debug:print-error 0 default-log-port "megatest~~.db~~ is readonly. Cannot proceed.")~~ (exit 1)) ;; per user request. If less than 100Meg space on dbdir partition, bail out with error ;; this will reduce issues in database corruption (common:check-db-dir-and-exit-if-insufficient) ;; override the number of reruns from the configs ;; this needs to be done at the place where is first runs:run-tests called ;(if (and config-reruns ; (> run-count config-reruns)) ;(set! run-count config-reruns)) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (set! time-to-exit #t) ~~(~~print~~ "Received signal " signum ", cleaning up before exit. Please wait...")~~ (let ((th1 (make-thread (lambda () ;; (let ((tdbdat (tasks:open-db))) (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) ~~(~~print~~ "Killed by signal " signum ". Exiting")~~ (thread-sleep! 3) (exit)))) (th2 (make-thread (lambda () (thread-sleep! 5) (debug:print 0 default-log-port "Done") (exit 4))))) (thread-start! th2)	\| \| \| \| \|	519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573	;; run-count is passed from megatest.scm as configf:lookup configdat "setup" "reruns", or defaults to 1. (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) ;; (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc toppath "/runconfigs.config")) (mtconfig (conc toppath "/megatest.config")) (readonly-mode (not (file-write-access? mtconfig))) (test-records (make-hash-table)) ;; need to process runconfigs before generating these lists (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) ;; (tdbdat (tasks:open-db)) (config-reruns (let ((x (configf:lookup configdat "setup" "reruns"))) (if x (string->number x) #f))) (allowed-tests #f) (runconf #f)) ;; check if readonly (when readonly-mode (debug:print-error 0 default-log-port "Megatest database is readonly. Cannot proceed.") (exit 1)) ;; per user request. If less than 100Meg space on dbdir partition, bail out with error ;; this will reduce issues in database corruption (common:check-db-dir-and-exit-if-insufficient) ;; override the number of reruns from the configs ;; this needs to be done at the place where is first runs:run-tests called ;(if (and config-reruns ; (> run-count config-reruns)) ;(set! run-count config-reruns)) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (set! time-to-exit #t) (debug:print 0 default-log-port "Received signal " signum ", cleaning up before exit. Please wait...") (let ((th1 (make-thread (lambda () ;; (let ((tdbdat (tasks:open-db))) (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) (debug:print 0 default-log-port "Killed by signal " signum ". Exiting") (thread-sleep! 3) (exit)))) (th2 (make-thread (lambda () (thread-sleep! 5) (debug:print 0 default-log-port "Done") (exit 4))))) (thread-start! th2)
︙			︙
784 785 786 787 788 789 790 ~~791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815~~ 816 817 818 ~~819 820~~ 821 822 823 824 825 826 827 828 ~~829 830~~ 831 832 833 834 835 836 837	(if (not (null? required-tests)) (debug:print-info 1 default-log-port "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 default-log-port "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* (~~(keep-going #t~~) ~~(run-queue-retries 5)~~ ~~#;(th1 (make-thread (lambda ()~~ ~~(handle-exceptions~~ ~~exn~~ ~~(begin~~ ~~(print-call-chain)~~ ~~(print " message: " ((condition-property-accessor 'exn 'message) exn)))~~ ~~(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests~~ ~~(any->number reglen) all-tests-registry)))~~ ~~"runs:run-tests-queue"))~~ ~~(th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here?~~ ~~;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...~~ ~~(let ((run-ids (rmt:get-all-run-ids)))~~ ~~(for-each (lambda (run-id)~~ ~~(if keep-going~~ ~~(handle-exceptions~~ ~~exn~~ ~~(debug:print 0 default-log-port "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)~~ (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) ~~run-ids)))~~ ~~"runs: mark-incompletes")))~~ ~~;; (thread-start! th1)~~ ~~(thread-start! th2)~~ ~~;; (thread-join! th1)~~ ;; just do the main stuff in the main thread (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry) ~~(set! keep-going #f)~~ ~~(thread-join! th2)~~ ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self ~~(runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) (launch:end-of-run-check run-id)))~~ (debug:print-info 0 default-log-port "No tests to run"))) (debug:print-info 4 default-log-port "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 default-log-port " run-count " run-count) ; (runs:run-post-hook run-id)) ; (debug:print-info 2 default-log-port "Not calling post hook runcount = " run-count ))	\| < < < < < < < < < < < < < < < < < < < < < < < < < < \| \|	797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824	(if (not (null? required-tests)) (debug:print-info 1 default-log-port "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 default-log-port "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup configdat "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* () ;; just do the main stuff in the main thread (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry) ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) (launch:end-of-run-check run-id))) (debug:print-info 0 default-log-port "No tests to run"))) (debug:print-info 4 default-log-port "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 default-log-port " run-count " run-count) ; (runs:run-post-hook run-id)) ; (debug:print-info 2 default-log-port "Not calling post hook runcount = " run-count ))
︙			︙
1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298	(not (member 'exclusive testmode))))) ;; (hash-table-delete! max-tries-hash (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! max-tries-hash (make-hash-table)) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry runsdat testdat) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) ;; hed should be dropped at this time #f))	>	1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286	(not (member 'exclusive testmode))))) ;; (hash-table-delete! max-tries-hash (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! max-tries-hash (make-hash-table)) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry runsdat testdat) (set! last-test-launch (current-seconds)) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! global-delta) (if (or (not (null? tal))(not (null? reg))) (runs:loop-values tal reg reglen regfull reruns) ;; hed should be dropped at this time #f))
︙			︙
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513	;; every time though the loop increment the test/itempatt val. ;; when the min is > max-allowed and none running then force exit ;; (define max-tries-hash (make-hash-table)) (define (runs:pretty-long-list lst) (if (> (length lst) 8)(append (take lst 3)(list "...")) lst)) ;;====================================================================== ;; runs:run-tests-queue is called by runs:run-tests ;;====================================================================== ;; ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)	> >	1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503	;; every time though the loop increment the test/itempatt val. ;; when the min is > max-allowed and none running then force exit ;; (define max-tries-hash (make-hash-table)) (define (runs:pretty-long-list lst) (if (> (length lst) 8)(append (take lst 3)(list "...")) lst)) (define last-loop-time-ms 0) ;;====================================================================== ;; runs:run-tests-queue is called by runs:run-tests ;;====================================================================== ;; ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
︙			︙
1638 1639 1640 1641 1642 1643 1644 1645 ~~1646~~ 1647 1648 1649 1650 1651 1652 1653	jobgroup: jobgroup waitons: waitons testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash))	> > > > > > > > > > > > \| > > > > >	1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660	jobgroup: jobgroup waitons: waitons testmode: testmode newtal: newtal itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) ;; too-tight loop detection and delay, this might hide issues ;; that occur in long run times. Consider commenting when debugging ;; (if (and (>= num-running max-concurrent-jobs) (< (- (current-milliseconds) last-loop-time-ms) 500)) (begin (if (runs:lownoise "too-tight-loop" 5) (debug:print-info 2 default-log-port "Excessively fast loop, delaying 1/2 second")) (thread-sleep! 0.5))) (set! last-loop-time-ms (current-milliseconds)) (runs:dat-regfull-set! runsdat regfull) (if (> (- (current-seconds) last-test-launch) 5) ;; be pretty aggressive for five seconds after (runs:too-soon-delay (conc "loop delay " hed) 1 0.6) ;; starting a test then apply more delay (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup configdat "setup" "give-up-waiting") 36000))) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 default-log-port "max-tries-hash: " (hash-table->alist max-tries-hash))
︙			︙
1888 1889 1890 1891 1892 1893 1894 ~~1895~~ 1896 1897 1898 1899 1900 1901 1902	))) ;; end loop on sorted test names ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched (rmt:set-var (conc "lunch-complete-" run-id) "yes") ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) ~~(thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle~~ (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") (equal? (configf:lookup configdat "setup" "run-wait") "yes")) (> num-running 0))	\|	1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909	))) ;; end loop on sorted test names ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched (rmt:set-var (conc "lunch-complete-" run-id) "yes") ;; now if -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) ;; (debug:print-info 13 default-log-port "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") (equal? (configf:lookup configdat "setup" "run-wait") "yes")) (> num-running 0))
︙			︙
2177 2178 2179 2180 2181 2182 2183 ~~2184~~ 2185 2186 2187 2188 2189 2190 2191	(begin ;; wait for less than max jobs here (if (runs:dat-wait-for-jobs-function runsdat) ((runs:dat-wait-for-jobs-function runsdat) testdat-rec)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin ~~(~~print~~ "ERROR: Failed to launch the test. Exiting as soon as possible")~~ (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)) ) ;; wait again here? )))))) ((KILLED) (debug:print 1 default-log-port "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")	\|	2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198	(begin ;; wait for less than max jobs here (if (runs:dat-wait-for-jobs-function runsdat) ((runs:dat-wait-for-jobs-function runsdat) testdat-rec)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (debug:print 0 default-log-port "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)) ) ;; wait again here? )))))) ((KILLED) (debug:print 1 default-log-port "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
︙			︙
2345 2346 2347 2348 2349 2350 2351 ~~2352~~ 2353 2354 2355 ~~2356~~ 2357 2358 2359 2360 2361 2362 2363	(state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) (rp-mutex (make-mutex)) (bup-mutex (make-mutex)) (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) ~~(dbfile (conc toppath "/megatest.db"))~~ (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode (member action write-access-actions)) ~~(debug:print-error 0 default-log-port ~~"megatest.~~db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .")~~ (exit 1))) (debug:print-info 4 default-log-port "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) (if (> 2 (length state-status)) (begin (debug:print-error 0 default-log-port "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") (exit)))	\| \|	2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370	(state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) (rp-mutex (make-mutex)) (bup-mutex (make-mutex)) (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) (dbfile (conc toppath "/.megatest/main.db")) (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode (member action write-access-actions)) (debug:print-error 0 default-log-port dbfile " is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") (exit 1))) (debug:print-info 4 default-log-port "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) (if (> 2 (length state-status)) (begin (debug:print-error 0 default-log-port "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") (exit)))
︙			︙
2700 2701 2702 2703 2704 2705 2706 ~~2707~~ 2708 ~~2709~~ 2710 2711 2712 2713 2714 2715 2716	)) runs) ;; special case - archive get (if (equal? (args:get-arg "-archive") "get") (archive:bup-get-data "get" #f #f test-records rp-mutex bup-mutex)) (if (or (equal? (args:get-arg "-archive") "save") (equal? (args:get-arg "-archive") "save-remove")) (begin ~~(~~print~~ "db archive started")~~ (archive:megatest-db target runnamepatt) ~~(~~print~~ "db archived")))~~ ) #t ) (define (runs:remove-test-directory test mode) ;; remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir)	\| \|	2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723	)) runs) ;; special case - archive get (if (equal? (args:get-arg "-archive") "get") (archive:bup-get-data "get" #f #f test-records rp-mutex bup-mutex)) (if (or (equal? (args:get-arg "-archive") "save") (equal? (args:get-arg "-archive") "save-remove")) (begin (debug:print 0 default-log-port "db archive started") (archive:megatest-db target runnamepatt) (debug:print 0 default-log-port "db archived"))) ) #t ) (define (runs:remove-test-directory test mode) ;; remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir)
︙			︙
2871 2872 2873 2874 2875 2876 2877 ~~2878~~ 2879 2880 2881 2882 2883 2884 2885	(lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (configf:lookup test-conf "test_meta" fld))) ;; (debug:print 5 default-log-port "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin ~~(~~print~~ "Updating " test-name " " fld " to " val)~~ (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." ;; (define (runs:get-tests-matching-tags tagpatt) (let* ((tagdata (rmt:get-tests-tags))	\|	2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892	(lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (configf:lookup test-conf "test_meta" fld))) ;; (debug:print 5 default-log-port "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (debug:print 0 default-log-port "Updating " test-name " " fld " to " val) (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." ;; (define (runs:get-tests-matching-tags tagpatt) (let* ((tagdata (rmt:get-tests-tags))
︙			︙