Overview
Comment: | Merged 31c3 from v1.55 into v1.60 and fixed couple compile issues |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
7383eb0df296776e15095ed546e62c9d |
User & Date: | mrwellan on 2014-06-02 13:33:40 |
Other Links: | branch diff | manifest | tags |
Context
2014-06-02
| ||
13:38 | Merged f2d7 from v1.55 to v1.60 check-in: e30eb474c8 user: mrwellan tags: v1.60 | |
13:33 | Merged 31c3 from v1.55 into v1.60 and fixed couple compile issues check-in: 7383eb0df2 user: mrwellan tags: v1.60 | |
11:21 | Merged 0f5d from v1.55 to v1.60 check-in: ce8b9e0b55 user: mrwellan tags: v1.60 | |
2014-03-28
| ||
11:45 | Fixed missing call to set state/status correctly on killreq check-in: 31c35bf056 user: mrwellan tags: v1.55 | |
Changes
Modified db.scm from [b980dbcc63] to [c98f61a5ff].
︙ | ︙ | |||
1990 1991 1992 1993 1994 1995 1996 | (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) ;; this test is younger, store it in the hash (hash-table-set! tests-hash full-testname testdat)))) results) (if (null? tal) (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests (loop (car tal)(cdr tal)))))))))) | | | | | | | | | | | | | | | | | | | | | | | | | | | 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 | (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) ;; this test is younger, store it in the hash (hash-table-set! tests-hash full-testname testdat)))) results) (if (null? tal) (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests (loop (car tal)(cdr tal)))))))))) ;; (let* ((remtries 10) ;; (proc #f)) ;; (set! proc (lambda (remtries) ;; (if (> remtries 0) ;; (handle-exceptions ;; exn ;; (let ((sleep-time (random 30)) ;; (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; (case err-status ;; ((busy) ;; (thread-sleep! sleep-time) ;; (proc 10)) ;; we never give up on busy ;; (else ;; (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") ;; (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) ;; (print-call-chain) ;; (debug:print 0 "Sleeping for " sleep-time) ;; (thread-sleep! sleep-time) ;; (debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up") ;; (proc (- remtries 1))))) ;; (apply sqlite3:execute db query params)) ;; (debug:print 0 "ERROR: too many attempts to access db were made and no sucess. query: " ;; query ", params: " params)))) ;; (proc remtries)) (define (db:test-get-records-for-index-file dbstruct run-id test-name) (let ((res '())) (sqlite3:for-each-row (lambda (id itempath state status run_duration logf-id comment-id) (let ((logf (db:get-string dbstruct logf-id)) (comment (db:get-string dbstruct comment-id))) |
︙ | ︙ |
Modified launch.scm from [dd204b865e] to [e833897f83].
︙ | ︙ | |||
328 329 330 331 332 333 334 | (begin (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this ;; section and the runit section? Or add a loop that tries three times with a 1/4 second ;; between tries? (let* ((pid (vector-ref exit-info 0))) (if (number? pid) | > > > | | | | | | | | | | | | | > | | | > | | | 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 | (begin (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this ;; section and the runit section? Or add a loop that tries three times with a 1/4 second ;; between tries? (let* ((pid (vector-ref exit-info 0))) (if (number? pid) (handle-exceptions exn (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.") ;;(process-signal pid signal/kill)) (begin (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")") (let ((processes (cmd-run->list (conc "pgrep -l -P " pid)))) (for-each (lambda (p) (let* ((parts (string-split p)) (p-id (if (> (length parts) 0) (string->number (car parts)) #f))) (if p-id (begin (debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id) ;; (process-signal pid signal/kill))))) ;; (system (conc "kill -9 " p-id)))))) (car processes))) (system (conc "kill -9 -" pid)) (tests:test-set-status! test-id "KILLED" "FAIL" (args:get-arg "-m") #f))) (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") ;; (tests:test-set-status! run-id test-id "KILLED" "FAIL" (tests:test-set-status! run-id test-id "KILLED" "FAIL" (args:get-arg "-m") #f) (exit 1) ;; IS THIS NECESSARY OR WISE??? ))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) (if keep-going (begin (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses |
︙ | ︙ |