Overview
Comment: | full fs access support and megatest.db turned off by default |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
c8926e3cff337af293928bd1bb2025ae |
User & Date: | matt on 2014-11-20 21:44:37 |
Other Links: | branch diff | manifest | tags |
Context
2014-11-20
| ||
22:20 | full fs access support and megatest.db turned off by default check-in: 84336a6b8f user: matt tags: v1.60 | |
21:44 | full fs access support and megatest.db turned off by default check-in: c8926e3cff user: matt tags: v1.60 | |
2014-11-19
| ||
14:36 | Fixed bugs in exit handling check-in: bd18160690 user: mrwellan tags: v1.60 | |
Changes
Modified megatest.scm from [b300543c22] to [16af3583d8].
︙ | ︙ | |||
285 286 287 288 289 290 291 | ;; The watchdog is to keep an eye on things like db sync etc. ;; (define *watchdog* (make-thread (lambda () (thread-sleep! 0.05) ;; delay for startup | > | | | | | | | | > | | | | | | | | | | | | | | | | | | | | | | 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | ;; The watchdog is to keep an eye on things like db sync etc. ;; (define *watchdog* (make-thread (lambda () (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db"))) (let loop () ;; sync for filesystem local db writes ;; (let ((start-time (current-seconds)) (servers-started (make-hash-table))) (for-each (lambda (run-id) (mutex-lock! *db-multi-sync-mutex*) (if (and legacy-sync (hash-table-ref/default *db-local-sync* run-id #f)) ;; (if (> (- start-time last-write) 5) ;; every five seconds (begin ;; let ((sync-time (- (current-seconds) start-time))) (db:multi-db-sync (list run-id) 'new2old) (if (common:low-noise-print 30 "sync new to old") (let ((sync-time (- (current-seconds) start-time))) (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run ;; (begin ;; (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id) ;; (server:kind-run run-id))))) (hash-table-delete! *db-local-sync* run-id))) (mutex-unlock! *db-multi-sync-mutex*)) (hash-table-keys *db-local-sync*))) ;; keep going unless time to exit ;; (if (not *time-to-exit*) (begin (thread-sleep! 1) ;; wait one second before syncing again (loop))))) "Watchdog thread"))) (thread-start! *watchdog*) (if (args:get-arg "-log") (let ((oup (open-output-file (args:get-arg "-log")))) (debug:print-info 0 "Sending log output to " (args:get-arg "-log")) (current-error-port oup) |
︙ | ︙ |
Modified rmt.scm from [714450135c] to [a4cf4136f4].
︙ | ︙ | |||
108 109 110 111 112 113 114 | ;; no longer killing the server in http-transport:client-api-send-receive ;; may kill it here but what are the criteria? ;; start with three calls then kill server (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))) | < | | | < | < | < < < < < < < < < < | | 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | ;; no longer killing the server in http-transport:client-api-send-receive ;; may kill it here but what are the criteria? ;; start with three calls then kill server (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))) (if (and (< attemptnum 10) (tasks:need-server run-id)) (begin (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) (rmt:send-receive cmd rid params (+ attemptnum 1))) (rmt:open-qry-close-locally cmd run-id params))))) (define (rmt:update-db-stats run-id rawcmd params duration) (mutex-lock! *db-stats-mutex*) (handle-exceptions exn (begin (debug:print 0 "WARNING: stats collection failed in update-db-stats") |
︙ | ︙ |
Modified runs.scm from [380bd479a2] to [6ed325fc14].
︙ | ︙ | |||
214 215 216 217 218 219 220 | (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) (tdbdat (tasks:open-db))) | | | 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) (tdbdat (tasks:open-db))) (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (set-signal-handler! signal/int (lambda (signum) (signal-mask! signum) (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((tdbdat (tasks:open-db))) (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "killed")) |
︙ | ︙ | |||
933 934 935 936 937 938 939 | (item-path (item-list->path itemdat)) (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) (num-running (rmt:get-count-tests-running-for-run-id run-id))) ;; every couple minutes verify the server is there for this run | | > | | | 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 | (item-path (item-list->path itemdat)) (tfullname (runs:make-full-test-name test-name item-path)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) (num-running (rmt:get-count-tests-running-for-run-id run-id))) ;; every couple minutes verify the server is there for this run (if (and (common:low-noise-print 60 "try start server" run-id) (tasks:need-server run-id)) (tasks:start-and-wait-for-server tdbdat run-id 10)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running 240)) (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) ;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*)) ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard |
︙ | ︙ | |||
1431 1432 1433 1434 1435 1436 1437 | '())) (lasttpath "/does/not/exist/I/hope")) (debug:print-info 4 "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action ((remove-runs) | | | | 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 | '())) (lasttpath "/does/not/exist/I/hope")) (debug:print-info 4 "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action ((remove-runs) (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) ;; seek and kill in flight -runtests with % as testpatt here (if (equal? testpatt "%") (tasks:kill-runner (db:delay-if-busy tdbdat) target run-name) (debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) (else |
︙ | ︙ |
Modified tasks.scm from [a0ed950b28] to [7c5174d4f3].
︙ | ︙ | |||
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 | (let ((res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) mdb ;; NEEDS dbprep ADDED "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) res)) ;; try to start a server and wait for it to be available ;; (define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries) ;; ensure a server is running for this run (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)) (delay-time 0)) (if (and (not server-dat) (< delay-time delay-max-tries)) (begin | > > > > > > > > > > > > > > > > | > | 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | (let ((res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) mdb ;; NEEDS dbprep ADDED "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) res)) (define (tasks:need-server run-id) (let ((forced (configf:lookup *configdat* "server" "required")) (maxqry (cdr (rmt:get-max-query-average run-id))) (threshold (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10")))) (cond (forced (if (common:low-noise-print 60 run-id "server required is set") (debug:print-info 0 "Server required is set, starting server.")) #t) ((> maxqry threshold) (if (common:low-noise-print 60 run-id "Max query time execeeded") (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, starting server.")) #t) (else #f)))) ;; try to start a server and wait for it to be available ;; (define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries) ;; ensure a server is running for this run (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)) (delay-time 0)) (if (and (not server-dat) (< delay-time delay-max-tries)) (begin (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) (debug:print 0 "Try starting server for run-id " run-id)) (server:kind-run run-id) (thread-sleep! (min delay-time 5)) (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)))))) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row |
︙ | ︙ |
Modified tests/fullrun/megatest.config from [6b64710311] to [79e8e68f6b].
︙ | ︙ | |||
137 138 139 140 141 142 143 | # timeout 0.025 timeout 0.01 # Server is required - slower but more resistant to Sqlite issues. # required yes # Start server when average query takes longer than this | | | 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | # timeout 0.025 timeout 0.01 # Server is required - slower but more resistant to Sqlite issues. # required yes # Start server when average query takes longer than this server-query-threshold 55500 # daemonize yes # hostname #{scheme (get-host-name)} ## disks are: ## name host:/path/to/area ## -or- |
︙ | ︙ |