︙ | | |
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
-
+
-
-
+
+
|
(currtime (current-seconds)))
(if (> (- currtime lasttime) waitval)
(begin
(hash-table-set! *runs:denoise* key currtime)
#t)
#f)))
(define (runs:can-run-more-tests jobgroup max-concurrent-jobs)
(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)
(thread-sleep! (cond
((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
(else 0)))
(let* ((num-running (rmt:get-count-tests-running))
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup jobgroup))
(let* ((num-running (rmt:get-count-tests-running run-id))
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
(job-group-limit (config-lookup *configdat* "jobgroups" jobgroup)))
(if (> (+ num-running num-running-in-jobgroup) 0)
(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
(if (not (eq? *last-num-running-tests* num-running))
(begin
(debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
(set! *last-num-running-tests* num-running)))
|
︙ | | |
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
|
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
|
-
+
|
'()
reg)))
(define runs:nothing-left-in-queue-count 0)
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records)
(let* ((loop-list (list hed tal reg reruns))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path mode: testmode)) ;; (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode))
(fails (runs:calc-fails prereqs-not-met))
(non-completed (runs:calc-not-completed prereqs-not-met)))
(debug:print-info 4 "START OF INNER COND #2 "
"\n can-run-more: " can-run-more
"\n testname: " hed
"\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
"\n non-completed: " (runs:pretty-string non-completed)
|
︙ | | |
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
|
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
|
-
+
-
+
|
((string? t)
t)
(else
(conc t))))
inlst))
(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry)
(let* ((run-limits-info (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
(let* ((run-limits-info (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
(have-resources (car run-limits-info))
(num-running (list-ref run-limits-info 1))
(num-running-in-jobgroup (list-ref run-limits-info 2))
(max-concurrent-jobs (list-ref run-limits-info 3))
(job-group-limit (list-ref run-limits-info 4))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path mode: testmode)) ;; (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode))
(fails (runs:calc-fails prereqs-not-met))
(non-completed (runs:calc-not-completed prereqs-not-met))
(loop-list (list hed tal reg reruns)))
(debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: ("
(string-intersperse
(map (lambda (t)
(if (vector? t)
|
︙ | | |
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
|
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
|
-
+
-
-
+
+
|
;; Register tests
;;
((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f))
(debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
(if (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
(begin
(rmt:general-call 'register-test run-id test-name item-path)
(rmt:general-call 'register-test run-id run-id test-name item-path)
(hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done))
(let ((th (make-thread (lambda ()
(mutex-lock! registry-mutex)
(hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
(mutex-unlock! registry-mutex)
;; If haven't done it before register a top level test if this is an itemized test
(if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done))
(rmt:general-call 'register-test run-id test-name ""))
(rmt:general-call 'register-test run-id test-name item-path)
(rmt:general-call 'register-test run-id run-id test-name ""))
(rmt:general-call 'register-test run-id run-id test-name item-path)
(mutex-lock! registry-mutex)
(hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)
(mutex-unlock! registry-mutex))
(conc test-name "/" item-path))))
(thread-start! th)))
(runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
(if (and (null? tal)(null? reg))
|
︙ | | |
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
|
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
|
-
+
|
(hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))
;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
;; and it is clear they *should* have run but did not.
(if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f))
(begin
(rmt:general-call 'register-test run-id test-name "")
(rmt:general-call 'register-test run-id run-id test-name "")
(hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done)))
;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
;;
(if (member (hash-table-ref/default test-registry tfullname #f)
'(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
(begin
|
︙ | | |
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
|
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
|
-
+
|
#f
(loop (car tal)(cdr tal) reg reruns)))
;; if items is a proc then need to run items:get-items-from-config, get the list and loop
;; - but only do that if resources exist to kick off the job
;; EXPAND ITEMS
((or (procedure? items)(eq? items 'have-procedure))
(let ((can-run-more (runs:can-run-more-tests jobgroup max-concurrent-jobs)))
(let ((can-run-more (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)))
(if (and (list? can-run-more)
(car can-run-more))
(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records)))
(if loop-list
(apply loop loop-list)))
;; if can't run more just loop with next possible test
(loop (car newtal)(cdr newtal) reg reruns))))
|
︙ | | |
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
|
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
|
-
+
-
+
-
+
|
(begin
(hash-table-set! *test-meta-updated* test-name #t)
(runs:update-test_meta test-name test-conf)))
;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
(let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
(test-id (rmt:get-test-id run-id test-name item-path))
(testdat (if test-id (rmt:get-test-info-by-id test-id) #f)))
(testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f)))
(if (not testdat)
(let loop ()
;; ensure that the path exists before registering the test
;; NOPE: Cannot! Don't know yet which disk area will be assigned....
;; (system (conc "mkdir -p " new-test-path))
;;
;; (open-run-close tests:register-test db run-id test-name item-path)
;;
;; NB// for the above line. I want the test to be registered long before this routine gets called!
;;
(if (not test-id)(set! test-id (rmt:get-test-id-cached run-id test-name item-path)))
(if (not test-id)
(begin
(debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
(rmt:general-call 'register-test run-id test-name item-path)
(rmt:general-call 'register-test run-id run-id test-name item-path)
(set! test-id (rmt:get-test-id run-id test-name item-path))))
(debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
(set! testdat (rmt:get-test-info-by-id test-id))
(set! testdat (rmt:get-test-info-by-id run-id test-id))
(if (not testdat)
(begin
(debug:print-info 0 "WARNING: server is overloaded, trying again in one second")
(thread-sleep! 1)
(loop)))))
(if (not testdat) ;; should NOT happen
(debug:print 0 "ERROR: failed to get test record for test-id " test-id))
|
︙ | | |
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
|
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
|
-
+
|
(set! skip-test "Skipping due to previous tests running"))))
((and skip-check
(configf:lookup test-conf "skip" "fileexists"))
(if (file-exists? (configf:lookup test-conf "skip" "fileexists"))
(set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))))
(if skip-test
(begin
(mt:test-set-state-status-by-id test-id "COMPLETED" "SKIP" skip-test)
(mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
(debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test))
(if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
(begin
(print "ERROR: Failed to launch the test. Exiting as soon as possible")
(set! *globalexitstatus* 1) ;;
(process-signal (current-process-id) signal/kill))))))))
((KILLED)
|
︙ | | |
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
|
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
|
-
+
|
(> (string-length dira)(string-length dirb))
#f)))))
(test-retry-time (make-hash-table))
(allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em
(let loop ((test (car sorted-tests))
(tal (cdr sorted-tests)))
(let* ((test-id (db:test-get-id test))
(new-test-dat (rmt:get-test-info-by-id test-id)))
(new-test-dat (rmt:get-test-info-by-id run-id test-id)))
(if (not new-test-dat)
(begin
(debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!")
(if (not (null? tal))
(loop (car tal)(cdr tal))))
(let* ((item-path (db:test-get-item-path new-test-dat))
(test-name (db:test-get-testname new-test-dat))
|
︙ | | |
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
|
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
|
-
+
-
+
-
+
|
(hash-table-set! test-retry-time test-fulln (current-seconds))))
(if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
;; up and blow it away.
(begin
(debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
(mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
(mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
(thread-sleep! 1))
(begin
(mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)
(mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
(thread-sleep! 1)))
;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
(if (null? tal)
(loop new-test-dat tal)
(loop (car tal)(append tal (list new-test-dat)))))
(begin
(mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
(mt:test-set-state-status-by-id run-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
(debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
(if (and real-dir
(> (string-length real-dir) 5)
(file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
(begin ;; let* ((realpath (resolve-pathname run-dir)))
(debug:print-info 1 "Recursively removing " real-dir)
(if (file-exists? real-dir)
|
︙ | | |
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
|
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
|
-
+
|
))
;; Only delete the records *after* removing the directory. If things fail we have a record
(rmt:delete-test-records (db:test-get-id test))
(if (not (null? tal))
(loop (car tal)(cdr tal))))))
((set-state-status)
(debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
(mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
(if (not (null? tal))
(loop (car tal)(cdr tal))))
((run-wait)
(debug:print-info 2 "still waiting, " (length tests) " tests still running")
(thread-sleep! 10)
(let ((new-tests (proc-get-tests run-id)))
(if (null? new-tests)
|
︙ | | |