Megatest: Diff

Differences From Artifact [934fc39609]:

File runs.scm — part of check-in [f2d7568565] at 2014-03-29 10:31:30 on branch v1.55 — Consolidated -target calls to one cleaner call, added HOME and USER to default ignored vars in env2file. Keep ignored vars in file but comment them out. (user: matt, size: 74966) [annotate] [blame] [check-ins using]

To Artifact [c22e67eca1]:

File runs.scm — part of check-in [70e753f237] at 2014-03-30 01:07:57 on branch confict-merge-v1.60 — Merge of v1.55 into v1.60 but too many conflicts, saving on branch confict-merge-v1.60 (user: matt, size: 74288) [annotate] [blame] [check-ins using] [more...]

︙
17 18 19 20 21 22 23 24 25 26 27 28 29 30	17 18 19 20 21 22 23 24 25 26 27 28 29 30 31	+	(declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) ;; (declare (uses filedb)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm")
︙
54 55 56 57 58 59 60 ~~61 62 63 64 65~~ 66 67 68 69 70 71 72	55 56 57 58 59 60 61 62 63 64 65 66 67 68	- - - - -	(toppath toppath) (envdat keyvals) ;; initial values start with keyvals (runconfig #f) (serverdat (if (args:get-arg "-server") runremote #f)) ;; to be used later (transport (or (args:get-arg "-transport") 'http)) ~~(db (if (and mconfig~~ ~~(or (args:get-arg "-server")~~ ~~(eq? transport 'fs)))~~ ~~(open-db)~~ ~~#f))~~ (run-id #f)) ;; Set all the environment vars we know so far, start with keys (for-each (lambda (keyval) (setenv (car keyval)(cadr keyval))) keyvals) ;; Set up various and sundry known vars here (setenv "MT_RUN_AREA_HOME" toppath)
︙
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 ~~100~~ 101 102 103 104 105 106 107 108 109 ~~110~~ 111 112 113 114 115 116 ~~117 118 119~~ ~~120~~ 121 122 123 ~~124~~ 125 126 127 128 129 130 131 132 133 134 135 136 137 138 ~~139~~ 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 ~~162~~ 163 164 165 ~~166 167~~ 168 169 170 171 172 173 174	81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171	- + - + - + - - - + - + + - + + - + + - + - - + +	(debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf) (if db (sqlite3:finalize! db)) (exit 1))) ;; Now have runconfigs data loaded, set environment vars (for-each (lambda (section) (for-each (lambda (varval) (set! envdat (append envdat (list varval))) ~~(setenv (car varval)(cadr varval)))~~ (safe-setenv (car varval)(cadr varval))) (configf:get-section runconfig section))) (list "default" target)) (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id))) (define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) (let* ((target (or (common:args-get-target) (get-environment-variable "MT_TARGET"))) ~~(keys (if inkeys inkeys (~~cdb:remote-run db~~:get-keys #f)))~~ (keys (if inkeys inkeys (rmt:get-keys))) (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) (vals (hash-table-ref/default env-vars-by-run-id run-id #f))) ;; get the info from the db and put it in the cache (if (not vals) (let ((ht (make-hash-table))) (hash-table-set! env-vars-by-run-id run-id ht) (set! vals ht) (for-each (lambda (key) ~~(hash-table-set! vals (car key) (cadr key))) ~~;; (cdb:remote-run db:get-run-key-val #f run-id (car key))))~~~~ (hash-table-set! vals (car key) (cadr key))) keyvals))) ;; from the cached data set the vars (hash-table-for-each vals (lambda (key val) (debug:print 2 "setenv " key " " val) ~~~~(if (and (string? key)~~ ~~(string? val))~~ (setenv key val)~~ (safe-setenv key val))) ~~(debug:print 0 "ERROR: Malformed environment variable definition: var=" var ", val=" val))))~~ (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) (alist->env-vars (hash-table-ref/default configdat "env-override" '())) ;; Lets use this as an opportunity to put MT_RUNNAME in the environment (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) (if runname ~~(setenv "MT_RUNNAME" ~~(if in~~runname ~~inrunname (cdb:remote-run db:get-run-name-from-id #f run-id))~~)~~ (setenv "MT_RUNNAME" runname) (debug:print 0 "ERROR: no value for runname for id " run-id))) (setenv "MT_RUN_AREA_HOME" toppath))) (define (set-item-env-vars itemdat) (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) ;; Every time can-run-more-tests is called increment the delay ;; ;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine ;; (define last-num-running-tests 0) (define runs:can-run-more-tests-count 0) ~~(define (runs:shrink-can-run-more-tests-count) ~~;; the db is a dummy var so we can use cdb:remote-run~~~~ (define (runs:shrink-can-run-more-tests-count) (set! runs:can-run-more-tests-count 0)) ;; (/ runs:can-run-more-tests-count 2))) ;; Temporary globals. Move these into the logic or into common ;; (define seen-cant-run-tests (make-hash-table)) ;; use to track tests that we suspect cannot be run (define (runs:inc-cant-run-tests testname) (hash-table-set! seen-cant-run-tests testname (+ (hash-table-ref/default seen-cant-run-tests testname 0) 1))) (define (runs:can-keep-running? testname n) (< (hash-table-ref/default seen-cant-run-tests testname 0) n)) (define runs:denoise (make-hash-table)) ;; key => last-time-ran (define (runs:lownoise key waitval) (let ((lasttime (hash-table-ref/default runs:denoise key 0)) (currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! runs:denoise key currtime) #t) #f))) ~~(define (runs:can-run-more-tests jobgroup max-concurrent-jobs)~~ (define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) (thread-sleep! (cond ((> runs:can-run-more-tests-count 20) 2);; obviously haven't had any work to do for a while (else 0))) ~~(let* ((num-running (~~cdb:remote-run db~~:get-count-tests-running #f)) (num-running-in-jobgroup (~~cdb:remote-run db~~:get-count-tests-running-in-jobgroup #f jobgroup))~~ (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count)))) (if (> (+ num-running num-running-in-jobgroup) 0) (set! runs:can-run-more-tests-count (+ runs:can-run-more-tests-count 1))) (if (not (eq? last-num-running-tests num-running))
︙
197 198 199 200 201 202 203 ~~204~~ 205 206 ~~207~~ 208 209 210 211 212 213 214 215 ~~216 217 218 219~~ 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 ~~239 240~~ 241 242 243 244 245 246 247	194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239	- - + - - - - - - + +	(list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags) ;; test-names ~~(common:clear-caches) ;; clear all caches~~ (let* ((keys (keys:config-get-fields configdat)) (keyvals (keys:target->keyval keys target)) ~~(run-id (~~cdb:remote-run db~~:register-run #f keyvals runname "new" "n/a" user)) ;; test-name)))~~ (run-id (rmt:register-run keyvals runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc toppath "/runconfigs.config")) (test-records (make-hash-table)) (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names (hash-table-keys all-tests-registry)) (test-names (tests:filter-test-names all-test-names test-patts)) (required-tests test-names)) ~~;; Update the synchronous setting in the db based on the default or what is set by the user~~ ~~;; This is done once here on a call to run tests rather than on every call to open-db~~ ~~(cdb:remote-run db:set-sync #f)~~ (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id already-seen-runconfig-info keyvals "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts))) (debug:print-info 0 "test names " test-names) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? passnum 0) (begin ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends ;; on test A but test B reached the point on being registered as NOT_STARTED and test ;; A failed for some reason then on re-run using -keepgoing the run can never complete. ~~(~~cdb:~~delete-tests-in-state ~~runremote~~ run-id "NOT_STARTED") (~~cdb:remote-run db~~:set-tests-state-status #f run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))~~ (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") (rmt:set-tests-state-status run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))) ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data
︙
381 382 383 384 385 386 387 ~~388~~ 389 390 391 392 393 394 395	373 374 375 376 377 378 379 380 381 382 383 384 385 386 387	- +	'() reg))) (define runs:nothing-left-in-queue-count 0) (define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records) (let* ((loop-list (list hed tal reg reruns)) ~~(prereqs-not-met (~~mt:lazy-~~get-prereqs-not-met run-id waitons item-path ~~mode:~~ testmode))~~ (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 4 "START OF INNER COND #2 " "\n can-run-more: " can-run-more "\n testname: " hed "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) "\n non-completed: " (runs:pretty-string non-completed)
︙
470 471 472 473 474 475 476 ~~477~~ 478 479 480 481 482 483 484	462 463 464 465 466 467 468 469 470 471 472 473 474 475 476	- +	(if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue") ~~(let ((test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id hed "")))~~ (let ((test-id (rmt:get-test-id run-id hed ""))) (mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull) (runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
︙
588 589 590 591 592 593 594 ~~595~~ 596 597 598 599 600 ~~601~~ 602 603 604 605 606 607 608	580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600	- + - +	((string? t) t) (else (conc t)))) inlst)) (define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry) ~~(let* ((run-limits-info (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running~~ (let* ((run-limits-info (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) ~~(prereqs-not-met (~~mt:lazy-~~get-prereqs-not-met run-id waitons item-path ~~mode:~~ testmode))~~ (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (loop-list (list hed tal reg reruns))) (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t)
︙
634 635 636 637 638 639 640 ~~641~~ 642 643 644 645 646 647 648 ~~649 650~~ 651 652 653 654 655 656 657	626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649	- + - - + +	;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) (if (eq? transport-type 'fs) ;; no point in parallel registration if use fs (begin ~~(c~~db:tests-~~register-test runremote run-id test-name item-path)~~ (rmt:general-call 'register-test run-id run-id test-name item-path) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)) (let ((th (make-thread (lambda () (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) (mutex-unlock! registry-mutex) ;; If haven't done it before register a top level test if this is an itemized test (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) ~~(c~~db:tests-~~register-test runremote run-id test-name "")) (c~~db:tests-~~register-test runremote run-id test-name item-path)~~ (rmt:general-call 'register-test run-id run-id test-name "")) (rmt:general-call 'register-test run-id run-id test-name item-path) (mutex-lock! registry-mutex) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (mutex-unlock! registry-mutex)) (conc test-name "/" item-path)))) (thread-start! th))) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg))
︙
759 760 761 762 763 764 765 ~~766~~ 767 768 769 770 771 772 773	751 752 753 754 755 756 757 758 759 760 761 762 763 764 765	- +	;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) ;; Do mark-and-find clean up of db before starting runing of quue ;; ;; (cdb:remote-run db:find-and-mark-incomplete #f) ~~(let ((run-info (~~cdb:remote-run db~~:get-run-info #f run-id))~~ (let ((run-info (rmt:get-run-info run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup configdat "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup configdat "setup" "max_concurrent_jobs")))
︙
813 814 815 816 817 818 819 ~~820~~ 821 822 823 824 825 826 827 828 829 830 831 832 ~~833~~ 834 835 836 837 838 839 840	805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832	- + - +	(priority (tests:testqueue-get-priority test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f (items (tests:testqueue-get-items test-record)) (item-path (item-list->path itemdat)) (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) ~~(num-running (~~cdb:remote-run db~~:get-count-tests-running-for-run-id #f run-id)))~~ (num-running (rmt:get-count-tests-running-for-run-id run-id))) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running 60)) (hash-table-set! max-tries-hash tfullname (+ (hash-table-ref/default max-tries-hash tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist max-tries-hash)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard ;; and it is clear they should have run but did not. (if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f)) (begin ~~(c~~db:tests-~~register-test runremote run-id test-name "")~~ (rmt:general-call 'register-test run-id run-id test-name "") (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done))) ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( ;; (if (member (hash-table-ref/default test-registry tfullname #f) '(DONOTRUN removed)) ;; common:cant-run-states-sym) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) (begin
︙
942 943 944 945 946 947 948 ~~949~~ 950 951 952 953 954 955 956	934 935 936 937 938 939 940 941 942 943 944 945 946 947 948	- +	#f (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) ~~(let ((can-run-more (runs:can-run-more-tests jobgroup max-concurrent-jobs)))~~ (let ((can-run-more (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs))) (if (and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records))) (if loop-list (apply loop loop-list))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns))))
︙
1048 1049 1050 1051 1052 1053 1054 ~~1055 1056~~ 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 ~~1067~~ 1068 1069 1070 ~~1071 1072~~ 1073 ~~1074~~ 1075 1076 1077 1078 1079 1080 1081	1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073	- - + + - + - - + + - +	(if (not (hash-table-ref/default test-meta-updated test-name #f)) (begin (hash-table-set! test-meta-updated test-name #t) (runs:update-test_meta test-name test-conf))) ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) ~~(test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id test-name item-path)) (testdat (if test-id (~~cdb~~:get-test-info-by-id runremote test-id) #f)))~~ (test-id (rmt:get-test-id run-id test-name item-path)) (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) (if (not testdat) (let loop () ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; ~~(if (not test-id)(set! test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id test-name item-path)))~~ (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) ~~(c~~db:tests-~~register-test runremote run-id test-name item-path) (set! test-id (~~cdb:remote-run db~~:get-test-id~~-cached #f~~ run-id test-name item-path))))~~ (rmt:general-call 'register-test run-id run-id test-name item-path) (set! test-id (rmt:get-test-id run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") ~~(set! testdat (~~cdb~~:get-test-info-by-id runremote test-id))~~ (set! testdat (rmt:get-test-info-by-id run-id test-id)) (if (not testdat) (begin (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id))
︙
1137 1138 1139 1140 1141 1142 1143 ~~1144~~ 1145 1146 1147 1148 1149 1150 1151 1152 ~~1153~~ 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 ~~1169~~ 1170 1171 1172 1173 1174 1175 1176	1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169	+ - + - + - +	(let ((skip-test #f) (skip-check (configf:get-section test-conf "skip"))) (cond ;; Have to check for skip conditions. This one skips if there are same-named tests ;; currently running ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) ;; run-ids = #f means all runs ~~(let ((running-tests (~~cdb:remote-run db~~:get-tests-for-runs-mindata #f #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))~~ (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))) (if skip-test (begin ~~(mt:test-set-state-status-by-id test-id "COMPLETED" "SKIP" skip-test)~~ (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! globalexitstatus 1) ;; (process-signal (current-process-id) signal/kill)))))))) ((KILLED) (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED)) ((LAUNCHED REMOTEHOSTSTART RUNNING) (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) (db:test-get-run_duration testdat))) 600) ;; i.e. no update for more than 600 seconds (begin (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") ~~(tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))~~ (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f)) (debug:print 2 "NOTE: " test-name " is already running"))) (else (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) (else
︙
1217 1218 1219 1220 1221 1222 1223 ~~1224~~ 1225 1226 1227 1228 1229 1230 1231	1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224	- +	;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) ~~(keys (~~cdb:remote-run db~~:get-keys db))~~ (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))) (debug:print-info 4 "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
︙
1262 1263 1264 1265 1266 1267 1268 ~~1269 1270~~ 1271 1272 1273 1274 1275 1276 1277 1278 ~~1279~~ 1280 1281 1282 1283 1284 1285 1286 ~~1287~~ 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 ~~1308~~ 1309 1310 ~~1311~~ 1312 1313 1314 1315 1316 1317 ~~1318~~ 1319 1320 1321 1322 1323 1324 1325	1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322	- - + + + + - + + + - + - + - + - +	((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) (else (debug:print-info 0 "action not recognised " action))) ~~(let ((sorted-tests (sort tests (lambda (a b)(let ((dira (db~~:test~~-get~~-rundir a))~~ ~~(dirb~~ (db:test-get-rundir b)))~~ (let ((sorted-tests (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr (db:test-get-rundir a)) ;; ) ;; (filedb:get-path fdb (db:test-get-rundir a))) (dirb ;; (rmt:sdb-qry 'getstr (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path fdb (db:test-get-rundir b)))) (if (and (string? dira)(string? dirb)) (> (string-length dira)(string-length dirb)) #f))))) (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) ~~(new-test-dat (~~cdb~~:get-test-info-by-id runremote test-id)))~~ (new-test-dat (rmt:get-test-info-by-id run-id test-id))) (if (not new-test-dat) (begin (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") (if (not (null? tal)) (loop (car tal)(cdr tal)))) (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path fdb ;; (rmt:sdb-qry 'getid ~~~~(run-dir~~ (db:test-get-rundir new-test-dat)) ;; run dir is from the link tree~~ (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f)) (test-state (db:test-get-state new-test-dat)) (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") ~~(mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)~~ (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin ~~(mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)~~ (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin ~~(mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)~~ (mt:test-set-state-status-by-id run-id (db:test-get-id test) "REMOVING" "LOCKED" #f) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir)
︙
1343 1344 1345 1346 1347 1348 1349 ~~1350~~ 1351 1352 1353 1354 ~~1355~~ 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 ~~1375~~ ~~1376 1377 1378 1379~~ 1380 1381 1382 1383 1384 1385 1386	1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381	- + - + - + - - - - + +	(debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records after removing the directory. If things fail we have a record ~~(~~cdb:remote-run db~~:delete-test-records db #f (db:test-get-id test))~~ (rmt:delete-test-records (db:test-get-run_id test)(db:test-get-id test)) (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) ~~(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)~~ (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) (let ((new-tests (proc-get-tests run-id))) (if (null? new-tests) (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") (loop (car new-tests)(cdr new-tests)))))))) ))))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") ~~(~~cdb:remote-run db~~:delete-run db run-id)~~ (rmt:delete-run run-id) ~~;; This is a pretty good place to purge old DELETED tests~~ ~~(cdb:remote-run db:delete-tests-for-run db run-id)~~ (~~cdb:remote-run db~~:delete-old-deleted-test-records db) (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds)) (rmt:delete-old-deleted-test-records) ;; (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds)) ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) ))
︙
1449 1450 1451 1452 1453 1454 1455 ~~1456~~ 1457 1458 1459 1460 1461 1462 1463 1464 ~~1465~~ 1466 1467 1468 ~~1469~~ 1470 1471 1472 1473 1474 1475 1476 1477 1478 ~~1479~~ 1480 1481 1482 1483 1484 1485 1486 1487 ~~1488~~ 1489 1490 1491 1492 1493 1494 1495 ~~1496 1497~~ 1498 1499 ~~1500~~ 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 ~~1518~~ 1519 1520 1521 1522 1523 1524 1525	1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522	- + - + - + - + - + + + - - + + - + - +	(for-each (lambda (run) (let ((run-id (db:get-value-by-header run header "id"))) (if (or lock (and unlock (begin (print "Do you really wish to unlock run " run-id "?\n y/n: ") (equal? "y" (read-line))))) ~~(~~cdb:remote-run db~~:lock/unlock-run db run-id lock unlock user)~~ (rmt:lock/unlock-run run-id lock unlock user) (debug:print-info 0 "Skipping lock/unlock on " run-id)))) runs))) ;;====================================================================== ;; Rollup runs ;;====================================================================== ;; Update the test_meta table for this test (define (runs:update-test_meta test-name test-conf) ~~(let ((currrecord (~~cdb:remote-run db~~:testmeta-get-record #f test-name)))~~ (let ((currrecord (rmt:testmeta-get-record test-name))) (if (not currrecord) (begin (set! currrecord (make-vector 11 #f)) ~~(~~cdb:remote-run db~~:testmeta-add-record #f test-name)))~~ (rmt:testmeta-add-record test-name))) (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) ~~(~~cdb:remote-run db~~:testmeta-update-field #f test-name fld val)))))~~ (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) (for-each (lambda (test-name) (let* ((test-conf (mt:lazy-read-test-config test-name))) ~~;; use the cdb:remote-run instead of passing in db~~ (if test-conf (runs:update-test_meta test-name test-conf)))) (hash-table-keys test-names)))) ;; This could probably be refactored into one complex query ... ;; NOT PORTED - DO NOT USE YET ;; (define (runs:rollup-run keys runname user keyvals) (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user) (let* ((db #f) ;; register run operates on the main db ~~(new-run-id (~~cdb:remote-run db~~:register-run #f keyvals runname "new" "n/a" user)) (prev-tests (~~cdb:remote-run tes~~t:get-matching-previous-test-run-records db new-run-id "%" "%"))~~ (new-run-id (rmt:register-run keyvals runname "new" "n/a" user)) (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) ~~(~~cdb:remote-run db~~:update-run-event_time db new-run-id)~~ (rmt:update-run-event_time new-run-id) ;; index the already saved tests by testname and itemdat in curr-tests-hash (for-each (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) (full-name (conc testname "/" item-path))) (hash-table-set! curr-tests-hash full-name testdat))) curr-tests) ;; NOPE: Non-optimal approach. Try this instead. ;; 1. tests are received in a list, most recent first ;; 2. replace the rollup test with the new always (for-each (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) (full-name (conc testname "/" item-path)) (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) ~~(test-steps (~~cdb:remote-run db~~:get-steps-for-test db (db:test-get-id testdat)))~~ (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) (new-test-record #f)) ;; replace these with insert ... select (apply sqlite3:execute db (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat)))
︙