︙ | | | ︙ | |
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
|
(set-signal-handler! signal/term sighand))
;; force the starting of a server
(debug:print 0 *default-log-port* "waiting on server...")
(server:start-and-wait *toppath*)
(runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
(set! runconf (if (file-exists? runconfigf)
(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
(begin
(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
#f)))
(if (not test-patts) ;; first time in - adjust testpatt
(set! test-patts (common:args-get-testpatt runconf)))
|
|
|
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
|
(set-signal-handler! signal/term sighand))
;; force the starting of a server
(debug:print 0 *default-log-port* "waiting on server...")
(server:start-and-wait *toppath*)
(runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
(set! runconf (if (common:file-exists? runconfigf)
(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
(begin
(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
#f)))
(if (not test-patts) ;; first time in - adjust testpatt
(set! test-patts (common:args-get-testpatt runconf)))
|
︙ | | | ︙ | |
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
|
;; NOTE: these are all parent tests, items are not expanded yet.
(debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
(let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
(if (> (length (hash-table-keys test-records)) 0)
(let* ((keep-going #t)
(run-queue-retries 5)
(th1 (make-thread (lambda ()
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))
;; (handle-exceptions
;; exn
;; (begin
;; (print-call-chain (current-error-port))
;; (debug:print-error 0 *default-log-port* "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))
;; (if (> run-queue-retries 0)
;; (begin
|
>
>
>
>
>
|
>
|
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
|
;; NOTE: these are all parent tests, items are not expanded yet.
(debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
(let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
(if (> (length (hash-table-keys test-records)) 0)
(let* ((keep-going #t)
(run-queue-retries 5)
(th1 (make-thread (lambda ()
(handle-exceptions
exn
(begin
(print-call-chain)
(print " message: " ((condition-property-accessor 'exn 'message) exn)))
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
(any->number reglen) all-tests-registry)))
;; (handle-exceptions
;; exn
;; (begin
;; (print-call-chain (current-error-port))
;; (debug:print-error 0 *default-log-port* "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))
;; (if (> run-queue-retries 0)
;; (begin
|
︙ | | | ︙ | |
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
|
'())))
(non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed!
(not (equal? x hed)))
(runs:calc-not-completed prereqs-not-met)))
(loop-list (list hed tal reg reruns))
;; configure the load runner
(numcpus (common:get-num-cpus #f))
(maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
(waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
(debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: ("
(string-intersperse
(map (lambda (t)
(if (vector? t)
(conc (db:test-get-state t) "/" (db:test-get-status t))
(conc " WARNING: t is not a vector=" t )))
prereqs-not-met)
|
|
>
|
|
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
|
'())))
(non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed!
(not (equal? x hed)))
(runs:calc-not-completed prereqs-not-met)))
(loop-list (list hed tal reg reruns))
;; configure the load runner
(numcpus (common:get-num-cpus #f))
(maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable
(maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
(waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
(debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: ("
(string-intersperse
(map (lambda (t)
(if (vector? t)
(conc (db:test-get-state t) "/" (db:test-get-status t))
(conc " WARNING: t is not a vector=" t )))
prereqs-not-met)
|
︙ | | | ︙ | |
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
|
(not (member 'exclusive testmode)))))
;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
;; we are going to reset all the counters for test retries by setting a new hash table
;; this means they will increment only when nothing can be run
(set! *max-tries-hash* (make-hash-table))
;; well, first lets see if cpu load throttling is enabled. If so wait around until the
;; average cpu load is under the threshold before continuing
(if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
(common:wait-for-cpuload maxload numcpus waitdelay))
(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
(runs:incremental-print-results run-id)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
(runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
;; (thread-sleep! *global-delta*)
(if (or (not (null? tal))(not (null? reg)))
(list (runs:queue-next-hed tal reg reglen regfull)
|
|
|
>
>
>
|
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
|
(not (member 'exclusive testmode)))))
;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
;; we are going to reset all the counters for test retries by setting a new hash table
;; this means they will increment only when nothing can be run
(set! *max-tries-hash* (make-hash-table))
;; well, first lets see if cpu load throttling is enabled. If so wait around until the
;; average cpu load is under the threshold before continuing
(if maxload ;; only gate if maxload is specified
(common:wait-for-cpuload maxload numcpus waitdelay))
(if maxhomehostload
(common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
(runs:incremental-print-results run-id)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
(runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
;; (thread-sleep! *global-delta*)
(if (or (not (null? tal))(not (null? reg)))
(list (runs:queue-next-hed tal reg reglen regfull)
|
︙ | | | ︙ | |
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
|
itemmaps: itemmaps
;; prereqs-not-met: prereqs-not-met
)))
(runs:dat-regfull-set! runsdat regfull)
;; every 15 minutes verify the server is there for this run
(if (and (common:low-noise-print 240 "try start server" run-id)
(not (server:check-if-running *toppath*)))
(server:kind-run *toppath*))
(if (> num-running 0)
(set! last-time-some-running (current-seconds)))
(if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
(hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
|
>
>
>
|
|
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
|
itemmaps: itemmaps
;; prereqs-not-met: prereqs-not-met
)))
(runs:dat-regfull-set! runsdat regfull)
;; every 15 minutes verify the server is there for this run
(if (and (common:low-noise-print 240 "try start server" run-id)
(not (or (and *runremote*
(remote-server-url *runremote*)
(server:ping (remote-server-url *runremote*)))
(server:check-if-running *toppath*))))
(server:kind-run *toppath*))
(if (> num-running 0)
(set! last-time-some-running (current-seconds)))
(if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
(hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
|
︙ | | | ︙ | |
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
|
(begin
(debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second")
(thread-sleep! 1)
(loop)))))
(if (not testdat) ;; should NOT happen
(debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
(set! test-id (db:test-get-id testdat))
(if (file-exists? test-path)
(change-directory test-path)
(begin
(debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
(change-directory *toppath*)))
(case (if force ;; (args:get-arg "-force")
'NOT_STARTED
(if testdat
|
|
|
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
|
(begin
(debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second")
(thread-sleep! 1)
(loop)))))
(if (not testdat) ;; should NOT happen
(debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
(set! test-id (db:test-get-id testdat))
(if (common:file-exists? test-path)
(change-directory test-path)
(begin
(debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
(change-directory *toppath*)))
(case (if force ;; (args:get-arg "-force")
'NOT_STARTED
(if testdat
|
︙ | | | ︙ | |
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
|
(debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
(loop (car new-tests)(cdr new-tests)))))
((archive)
(if (and run-dir (not toplevel-with-children))
(let ((ddir (conc run-dir "/")))
(case (string->symbol (args:get-arg "-archive"))
((save save-remove keep-html)
(if (file-exists? ddir)
(debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
(if (not (null? tal))
(loop (car tal)(cdr tal))))
)))
)
(if worker-thread (thread-join! worker-thread))))))
;; remove the run if zero tests remain
(if (eq? action 'remove-runs)
(let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
(if (null? remtests) ;; no more tests remaining
(let* ((dparts (string-split lasttpath "/"))
(runpath (conc "/" (string-intersperse
(take dparts (- (length dparts) 1))
"/"))))
(debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
(rmt:delete-run run-id)
|
|
>
|
|
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
|
(debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
(loop (car new-tests)(cdr new-tests)))))
((archive)
(if (and run-dir (not toplevel-with-children))
(let ((ddir (conc run-dir "/")))
(case (string->symbol (args:get-arg "-archive"))
((save save-remove keep-html)
(if (common:file-exists? ddir)
(debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
(if (not (null? tal))
(loop (car tal)(cdr tal))))
)))
)
(if worker-thread (thread-join! worker-thread))))))
;; remove the run if zero tests remain
(if (eq? action 'remove-runs)
(let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above?
(remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t)))
(if (null? remtests) ;; no more tests remaining
(let* ((dparts (string-split lasttpath "/"))
(runpath (conc "/" (string-intersperse
(take dparts (- (length dparts) 1))
"/"))))
(debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
(rmt:delete-run run-id)
|
︙ | | | ︙ | |
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
|
runs)
;; (sqlite3:finalize! (db:delay-if-busy tdbdat))
)
#t)
(define (runs:remove-test-directory test mode) ;; remove-data-only)
(let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree
(real-dir (if (file-exists? run-dir)
;; (resolve-pathname run-dir)
(common:nice-path run-dir)
#f)))
(case mode
((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
(debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
(if (and real-dir
(> (string-length real-dir) 5)
(file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
(begin ;; let* ((realpath (resolve-pathname run-dir)))
(debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
(if (file-exists? real-dir)
(runs:safe-delete-test-dir real-dir)
(debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
(if real-dir
(debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
(debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
(if (symbolic-link? run-dir)
(begin
|
|
|
|
|
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
|
runs)
;; (sqlite3:finalize! (db:delay-if-busy tdbdat))
)
#t)
(define (runs:remove-test-directory test mode) ;; remove-data-only)
(let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree
(real-dir (if (common:file-exists? run-dir)
;; (resolve-pathname run-dir)
(common:nice-path run-dir)
#f)))
(case mode
((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
(debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
(if (and real-dir
(> (string-length real-dir) 5)
(common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
(begin ;; let* ((realpath (resolve-pathname run-dir)))
(debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
(if (common:file-exists? real-dir)
(runs:safe-delete-test-dir real-dir)
(debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
(if real-dir
(debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
(debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
(if (symbolic-link? run-dir)
(begin
|
︙ | | | ︙ | |
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
|
(exit 3))
(else
(let (;; (db #f)
(keys #f))
(if (launch:setup)
(begin
(full-runconfigs-read) ;; cache the run config
(launch:cache-config)) ;; do not cache here - need to be sure runconfigs is processed
(begin
(debug:print 0 *default-log-port* "Failed to setup, exiting")
(exit 1)))
(set! keys (keys:config-get-fields *configdat*))
;; have enough to process -target or -reqtarg here
|
>
|
|
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
|
(exit 3))
(else
(let (;; (db #f)
(keys #f))
(if (launch:setup)
(begin
(full-runconfigs-read) ;; cache the run config
;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
) ;; do not cache here - need to be sure runconfigs is processed
(begin
(debug:print 0 *default-log-port* "Failed to setup, exiting")
(exit 1)))
(set! keys (keys:config-get-fields *configdat*))
;; have enough to process -target or -reqtarg here
|
︙ | | | ︙ | |
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
|
;; clean cache files
(define (runs:clean-cache target runname toppath)
(if target
(if runname
(let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
(runtop (conc linktree "/" target "/" runname))
(files (if (file-exists? runtop)
(append (glob (conc runtop "/.megatest*"))
(glob (conc runtop "/.runconfig*")))
'())))
(if (null? files)
(debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")
(begin
(debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n "))
|
|
|
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
|
;; clean cache files
(define (runs:clean-cache target runname toppath)
(if target
(if runname
(let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
(runtop (conc linktree "/" target "/" runname))
(files (if (common:file-exists? runtop)
(append (glob (conc runtop "/.megatest*"))
(glob (conc runtop "/.runconfig*")))
'())))
(if (null? files)
(debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")
(begin
(debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n "))
|
︙ | | | ︙ | |