Overview
Comment: | Added kill of -runtests processes if -remove-runs is called with test patt of % |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
36d5293a018d5658fd3d9ed5619b4fe5 |
User & Date: | matt on 2014-10-16 23:58:07 |
Other Links: | branch diff | manifest | tags |
Context
2014-10-17
| ||
00:19 | Changed default server run time to 60 seconds. Removed wait on RUNNING for servers - caused more hassle than benefit check-in: 9818a847b5 user: matt tags: v1.60 | |
2014-10-16
| ||
23:58 | Added kill of -runtests processes if -remove-runs is called with test patt of % check-in: 36d5293a01 user: matt tags: v1.60 | |
11:20 | Fixed jobgroups (added sensitivity to n/a) and num running check-in: 7ecb5e5d84 user: mrwellan tags: v1.60 | |
Changes
Modified launch.scm from [ba7f6d2131] to [acb41eb596].
︙ | ︙ | |||
511 512 513 514 515 516 517 | (begin (set! best dirpath) (set! bestsize freespc))))) (map car disks))) (if (and best (> bestsize 0)) best (begin | | | 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 | (begin (set! best dirpath) (set! bestsize freespc))))) (map car disks))) (if (and best (> bestsize 0)) best (begin (if (common:low-noise-print 20 "no valid disks") (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!")) (exit 1))))) ;; Desired directory structure: ;; ;; <linkdir> - <target> - <testname> -. ;; | |
︙ | ︙ |
Modified runs.scm from [aabc29c272] to [0a93b5efd0].
︙ | ︙ | |||
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | (>= num-running-in-jobgroup job-group-limit)) (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) #t) (else #f)))) (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 3)) ;; test-names (let* ((keys (keys:config-get-fields *configdat*)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc *toppath* "/runconfigs.config")) (test-records (make-hash-table)) ;; need to process runconfigs before generating these lists (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) | > | > > > > > > > > > > > > > | 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | (>= num-running-in-jobgroup job-group-limit)) (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) #t) (else #f)))) (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 3)) ;; test-names (let* ((keys (keys:config-get-fields *configdat*)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc *toppath* "/runconfigs.config")) (test-records (make-hash-table)) ;; need to process runconfigs before generating these lists (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) (tasks-db (tasks:open-db))) (set-signal-handler! signal/int (lambda (signum) (let ((tdb (tasks:open-db))) (tasks:set-state-given-param-key tdb task-key "killed") (sqlite3:finalize! tdb)) (print "Killed by sigint. Exiting") (exit))) ;; register this run in monitor.db (tasks:add tasks-db "run-tests" user target runname test-patts task-key) ;; params) (tasks:set-state-given-param-key tasks-db task-key "running") (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) ;; Now generate all the tests lists (set! all-tests-registry (tests:get-all)) |
︙ | ︙ | |||
352 353 354 355 356 357 358 | (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 "No tests to run"))) (debug:print-info 4 "All done by here") | > | | 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 | (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 "No tests to run"))) (debug:print-info 4 "All done by here") (tasks:set-state-given-param-key tasks-db task-key "done") (sqlite3:finalize! tasks-db))) ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. ;; ;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns ;; If reg is full (i.e. length >= n ;; loop with (car reg) tal (cdr reg) reruns |
︙ | ︙ | |||
1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 | ;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(remove-data-only #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))) | > | 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 | ;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(remove-data-only #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) (tasks-db (tasks:open-db)) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))) |
︙ | ︙ | |||
1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 | testpatt states statuses not-in: #f sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")) (debug:print-info 4 "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action ((remove-runs) (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) | > > > > > | 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 | testpatt states statuses not-in: #f sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")) (debug:print-info 4 "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action ((remove-runs) ;; seek and kill in flight -runtests with % as testpatt here (if (equal? testpatt "%") (tasks:kill-runner tasks-db target run-name) (debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) |
︙ | ︙ | |||
1496 1497 1498 1499 1500 1501 1502 | ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) )) | | > | 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 | ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) )) runs) (sqlite3:finalize! tasks-db)) #t) (define (runs:remove-test-directory db test remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) (resolve-pathname run-dir) #f))) |
︙ | ︙ |
Modified tasks.scm from [e478443442] to [3f21ed396f].
︙ | ︙ | |||
387 388 389 390 391 392 393 | "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" (car (user-information (current-user-id)))) res)) ;; register a task (define (tasks:add mdb action owner target runname testpatt params) (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) | | | < | 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 | "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" (car (user-information (current-user-id)))) res)) ;; register a task (define (tasks:add mdb action owner target runname testpatt params) (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" action owner target runname testpatt (if params params ""))) (define (keys:key-vals-hash->target keys key-params) (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) (if (> (length keys) 1) (for-each (lambda (key) (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) |
︙ | ︙ | |||
575 576 577 578 579 580 581 582 583 584 585 586 587 588 | (current-process-id) (get-host-name))) (define (tasks:set-state mdb task-id state) (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) ;;====================================================================== ;; The routines to process tasks ;;====================================================================== ;; NOTE: It might be good to add one more layer of checking to ensure ;; that no task gets run in parallel. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 | (current-process-id) (get-host-name))) (define (tasks:set-state mdb task-id state) (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) ;;====================================================================== ;; Access using task key (stored in params; (hash-table->alist flags) hostname pid ;;====================================================================== (define (tasks:param-key->id mdb task-params) (handle-exceptions exn #f (sqlite3:first-result mdb "SELECT id FROM tasks_queue WHERE params LIKE ?;" task-params))) (define (tasks:set-state-given-param-key mdb param-key new-state) (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)) (define (tasks:get-records-given-param-key mdb param-key state-patt action-patt test-patt) (handle-exceptions exn '() (sqlite3:first-row mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" param-key state-patt action-patt test-patt))) ;;====================================================================== ;; Rogue items, no place to put these yet ;;====================================================================== (define (tasks:find-task-queue-records mdb target run-name test-patt state-patt action-patt) ;; (handle-exceptions ;; exn ;; '() ;; (sqlite3:first-row (let ((res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (cons a b) res))) mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue WHERE target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" target run-name state-patt action-patt test-patt) res)) ;; ) (define (tasks:kill-runner mdb target run-name) (let ((records (tasks:find-task-queue-records mdb target run-name "%" "running" "run-tests")) (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string (if (null? records) (debug:print 0 "No run launching processes found for " target " / " run-name) (debug:print 0 "Found " (length records) " run(s) to kill.")) (for-each (lambda (record) (let* ((param-key (list-ref record 8)) (match-dat (string-search hostpid-rx param-key)) (hostname (cadr match-dat)) (pid (caddr match-dat))) (debug:print 0 "Sending SIGINT to process " pid " on host " hostname) (if (equal? (get-host-name) hostname) (process-signal (string->number pid) signal/int) ;; (call-with-environment-variables (let ((old-targethost (getenv "TARGETHOST"))) (set-environment-variable "TARGETHOST" hostname) (system (conc "nbfake " kill " " pid)) (if old-targethost (set-environment-variable "TARGETHOST" old-targethost)))))) records))) ;;====================================================================== ;; The routines to process tasks ;;====================================================================== ;; NOTE: It might be good to add one more layer of checking to ensure ;; that no task gets run in parallel. |
︙ | ︙ |
Modified tests/fdktestqa/fdk.config from [e76cce79f3] to [bb2780b886].
︙ | ︙ | |||
17 18 19 20 21 22 23 | [jobtools] maxload 4 launcher nbfake [server] # timeout 0.01 # homehost xena | | > > > > | | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | [jobtools] maxload 4 launcher nbfake [server] # timeout 0.01 # homehost xena # homehost 143.182.225.38 # force server server-query-threshold 0 [jobtools] # launcher nbq -P ch_vp -C SLES11_EM64T_4G -Q /ciaf/fdk # launcher nbfake # maxload 4 |