Overview
Comment: | Better error reporting (use stderr), faster registration of tests, bug fix in tasks path to db handling |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
3757f74464111e176863209a126d027b |
User & Date: | matt on 2014-11-16 20:02:13 |
Other Links: | branch diff | manifest | tags |
Context
2014-11-16
| ||
20:40 | Use required mode by default in QA - for now check-in: 91923e2ee5 user: matt tags: v1.60 | |
20:02 | Better error reporting (use stderr), faster registration of tests, bug fix in tasks path to db handling check-in: 3757f74464 user: matt tags: v1.60 | |
17:37 | Added channel for result codes to http communication. Fixed some unit tests check-in: 98ffd649ba user: matt tags: v1.60 | |
Changes
Modified common.scm from [fba3cc219f] to [afd51ef265].
︙ | ︙ | |||
158 159 160 161 162 163 164 | (define (common:read-encoded-string instr) (handle-exceptions exn (handle-exceptions exn (begin (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn)) | | | 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | (define (common:read-encoded-string instr) (handle-exceptions exn (handle-exceptions exn (begin (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== |
︙ | ︙ |
Modified datashare.scm from [b4dde544c7] to [2abd8aec1c].
︙ | ︙ | |||
253 254 255 256 257 258 259 | ((busy) (thread-sleep! sleep-time)) (else (print "EXCEPTION: database overloaded or unreadable.") (print " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (print " status: " ((condition-property-accessor 'sqlite3 'status) exn)) | | | 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | ((busy) (thread-sleep! sleep-time)) (else (print "EXCEPTION: database overloaded or unreadable.") (print " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (print " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port)) (thread-sleep! sleep-time) (print "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) (apply open-run-close-exception-handling proc idb params)) (apply open-run-close-no-exception-handling proc idb params))) (define (open-run-close-no-exception-handling proc idb . params) ;; (print "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) |
︙ | ︙ |
Modified db.scm from [15f2be2e7b] to [658953f833].
︙ | ︙ | |||
327 328 329 330 331 332 333 | (sqlite3:database? rundb)) (handle-exceptions exn (begin (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " db: " rundb) | | | 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 | (sqlite3:database? rundb)) (handle-exceptions exn (begin (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " db: " rundb) (print-call-chain (current-error-port)) #f) (sqlite3:interrupt! rundb) (sqlite3:finalize! rundb #t)))) ;; (mutex-unlock! *db-sync-mutex*) ) (define (db:open-inmem-db) |
︙ | ︙ | |||
434 435 436 437 438 439 440 | (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (debug:print 0 " src db: " (db:dbdat-get-path fromdb)) (for-each (lambda (dbdat) (debug:print 0 " dbpath: " (db:dbdat-get-path dbdat))) (cons todb slave-dbs)) | | | 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 | (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (debug:print 0 " src db: " (db:dbdat-get-path fromdb)) (for-each (lambda (dbdat) (debug:print 0 " dbpath: " (db:dbdat-get-path dbdat))) (cons todb slave-dbs)) (print-call-chain (current-error-port))) (cond ((not fromdb) (debug:print 3 "WARNING: db:sync-tables called with fromdb missing") -1) ((not todb) (debug:print 3 "WARNING: db:sync-tables called with todb missing") -2) ((not (sqlite3:database? (db:dbdat-get-db fromdb))) (debug:print 0 "ERROR: db:sync-tables called with fromdb not a database " fromdb) -3) ((not (sqlite3:database? (db:dbdat-get-db todb))) (debug:print 0 "ERROR: db:sync-tables called with todb not a database " todb) -4) |
︙ | ︙ | |||
647 648 649 650 651 652 653 | ((busy) (thread-sleep! sleep-time)) (else (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) | | | 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 | ((busy) (thread-sleep! sleep-time)) (else (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port)) (thread-sleep! sleep-time) (debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) (apply open-run-close-exception-handling proc idb params)) (apply open-run-close-no-exception-handling proc idb params))) ;; (define open-run-close (define open-run-close open-run-close-exception-handling) |
︙ | ︙ | |||
1547 1548 1549 1550 1551 1552 1553 | ;; i.e. these lists define what to NOT show. ;; states and statuses are required to be lists, empty is ok ;; not-in #t = above behaviour, #f = must match (define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals) (if (not (number? run-id)) (begin ;; no need to treat this as an error by default (debug:print 4 "WARNING: call to db:get-tests-for-run with bad run-id=" run-id) | | | 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 | ;; i.e. these lists define what to NOT show. ;; states and statuses are required to be lists, empty is ok ;; not-in #t = above behaviour, #f = must match (define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals) (if (not (number? run-id)) (begin ;; no need to treat this as an error by default (debug:print 4 "WARNING: call to db:get-tests-for-run with bad run-id=" run-id) ;; (print-call-chain (current-error-port)) '()) (let* ((qryvalstr (case qryvals ((shortlist) "id,run_id,testname,item_path,state,status") ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") (else qryvals))) (res '()) ;; if states or statuses are null then assume match all when not-in is false |
︙ | ︙ | |||
2671 2672 2673 2674 2675 2676 2677 | exn (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) (if (eq? err-status 'done) default (begin (debug:print 0 "ERROR: query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) | | | 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 | exn (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) (if (eq? err-status 'done) default (begin (debug:print 0 "ERROR: query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) ;;====================================================================== ;; Extract ods file from the db ;;====================================================================== |
︙ | ︙ |
Modified lock-queue.scm from [141b4e4668] to [fb7e24faf1].
︙ | ︙ | |||
187 188 189 190 191 192 193 | (mystart (current-seconds)) (db (lock-queue:db-dat-get-db dbdat))) (handle-exceptions exn (begin (debug:print 0 "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) | | | | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | (mystart (current-seconds)) (db (lock-queue:db-dat-get-db dbdat))) (handle-exceptions exn (begin (debug:print 0 "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) (thread-sleep! 10) (if (> count 0) (begin (sqlite3:finalize! db) (lock-queue:wait-turn fname test-id count: (- count 1))) (begin (debug:print 0 "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain") (print-call-chain (current-error-port)) #f))) (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") (sqlite3:execute db "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" test-id mystart) (thread-sleep! 1) ;; give other tests a chance to register |
︙ | ︙ |
Modified mt.scm from [fdb95af183] to [15956fcc00].
︙ | ︙ | |||
166 167 168 169 170 171 172 | ;;====================================================================== ;; speed up for common cases with a little logic (define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) (if (not (and run-id test-id)) (begin (debug:print 0 "ERROR: bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) | | | 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | ;;====================================================================== ;; speed up for common cases with a little logic (define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) (if (not (and run-id test-id)) (begin (debug:print 0 "ERROR: bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) (print-call-chain (current-error-port)) #f) (begin (cond ((and newstate newstatus newcomment) (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) ((and newstate newstatus) (rmt:general-call 'state-status run-id newstate newstatus test-id)) |
︙ | ︙ |
Modified portlogger.scm from [05c38f2bc0] to [c19f5a6299].
︙ | ︙ | |||
56 57 58 59 60 61 62 | exn (begin ;; (release-dot-lock fname) (debug:print 0 "ERROR: portlogger:open-run-close failed. " proc " " params) (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it | | | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | exn (begin ;; (release-dot-lock fname) (debug:print 0 "ERROR: portlogger:open-run-close failed. " proc " " params) (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) (db (portlogger:open-db fname)) (res (apply proc db params))) (sqlite3:finalize! db) ;; (release-dot-lock fname) res)))) |
︙ | ︙ | |||
102 103 104 105 106 107 108 | (define (portlogger:get-prev-used-port db) (handle-exceptions exn (begin (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) | | | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | (define (portlogger:get-prev-used-port db) (handle-exceptions exn (begin (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) (print-call-chain (current-error-port)) (debug:print 0 "Continuing anyway.") #f) (sqlite3:fold-row (lambda (var curr) (or curr var curr)) #f db |
︙ | ︙ | |||
127 128 129 130 131 132 133 | (random (- 64000 lowport)))))) (handle-exceptions exn (begin (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) | | | 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | (random (- 64000 lowport)))))) (handle-exceptions exn (begin (debug:print 0 "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) (print-call-chain (current-error-port)) (debug:print 0 "Continuing anyway.")) (portlogger:take-port db portnum)) portnum)) ;; set port to "released", "failed" etc. ;; (define (portlogger:set-port db portnum value) |
︙ | ︙ | |||
158 159 160 161 162 163 164 | (handle-exceptions exn (begin (debug:print 0 "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) | | | 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | (handle-exceptions exn (begin (debug:print 0 "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port))) (cond ((> numargs 1) ;; most commands (case (string->symbol (car args)) ;; commands with two or more params ((take)(portlogger:take-port db (string->number (cadr args)))) ((set) (portlogger:set-port db (string->number (cadr args)) (caddr args)) (caddr args)) ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed))))))) (sqlite3:finalize! db) result)) ;; (print (apply portlogger:main (cdr (argv)))) |
Modified rmt.scm from [7a184ad9b2] to [045a714be1].
︙ | ︙ | |||
95 96 97 98 99 100 101 | (db:string->obj res) ;; (if (< attemptnum 100) ;; (begin ;; (hash-table-delete! *runremote* run-id) ;; (thread-sleep! 0.5) ;; (rmt:send-receive cmd rid params attempnum: (+ attemptnum 1))) ;; (begin | | | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | (db:string->obj res) ;; (if (< attemptnum 100) ;; (begin ;; (hash-table-delete! *runremote* run-id) ;; (thread-sleep! 0.5) ;; (rmt:send-receive cmd rid params attempnum: (+ attemptnum 1))) ;; (begin ;; (print-call-chain (current-error-port)) ;; (debug:print 0 "ERROR: too many attempts to communicate have failed. Giving up. Kill your mtest processes and start over") ;; (exit 1))))) (begin ;; let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection ;; no longer killing the server in http-transport:client-api-send-receive |
︙ | ︙ | |||
298 299 300 301 302 303 304 | (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) (define (rmt:get-test-info-by-id run-id test-id) (if (and (number? run-id)(number? test-id)) (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) (begin (debug:print 0 "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) | | | 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 | (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) (define (rmt:get-test-info-by-id run-id test-id) (if (and (number? run-id)(number? test-id)) (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) (begin (debug:print 0 "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) (print-call-chain (current-error-port)) #f))) (define (rmt:test-get-rundir-from-test-id run-id test-id) (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) (let* ((test-path (if (string? work-area) |
︙ | ︙ | |||
323 324 325 326 327 328 329 | (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals) (if (number? run-id) (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals)) (begin (debug:print "ERROR: rmt:get-tests-for-run called with bad run-id=" run-id) | | | 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals) (if (number? run-id) (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals)) (begin (debug:print "ERROR: rmt:get-tests-for-run called with bad run-id=" run-id) (print-call-chain (current-error-port)) '()))) (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) (let ((run-id-list (if run-ids run-ids (rmt:get-all-run-ids)))) (apply append (map (lambda (run-id) |
︙ | ︙ |
Modified runs.scm from [692dff51df] to [94d1fbb17e].
︙ | ︙ | |||
668 669 670 671 672 673 674 | reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) | | < | < < < < < < | | < < | < < < | 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 | reruns) #f)) ;; Register tests ;; ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs (rmt:general-call 'register-test run-id run-id test-name item-path) (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) (rmt:general-call 'register-test run-id run-id test-name "")) (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) (if (and (null? tal)(null? reg)) (list hed tal (append reg (list hed)) reruns) (list (runs:queue-next-hed tal reg reglen regfull) (runs:queue-next-tal tal reg reglen regfull) ;; NB// Here we are building reg as we register tests ;; if regfull we must pop the front item off reg |
︙ | ︙ |
Modified tasks.scm from [6b278cc270] to [f329b3e32b].
︙ | ︙ | |||
21 22 23 24 25 26 27 | ;;====================================================================== ;; Tasks db ;;====================================================================== ;; wait up to aprox n seconds for a journal to go away ;; (define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) | > > | | | | | | | | | | | | | | | | | | | | < < | | | > > | | | | > | > | | | 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | ;;====================================================================== ;; Tasks db ;;====================================================================== ;; wait up to aprox n seconds for a journal to go away ;; (define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) (if (not (string? path)) (debug:print 0 "ERROR: Called tasks:wait-on-journal with path=" path " (not a string)") (let ((fullpath (conc path "-journal"))) (handle-exceptions exn #t ;; if stuff goes wrong just allow it to move on (let loop ((journal-exists (file-exists? fullpath)) (count n)) ;; wait ten times ... (if journal-exists (begin (if (and waiting-msg (eq? (modulo n 30) 0)) (debug:print 0 waiting-msg)) (if (> count 0) (begin (thread-sleep! 1) (loop (file-exists? fullpath) (- count 1))) (begin (if remove (system (conc "rm -rf " fullpath))) #f))) #t)))))) (define (tasks:get-task-db-path) (let* ((linktree (configf:lookup *configdat* "setup" "linktree")) (dbpath (conc linktree "/.db"))) dbpath)) ;; If file exists AND ;; file readable ;; ==> open it ;; If file exists AND ;; file NOT readable ;; ==> open in-mem version ;; If file NOT exists ;; ==> open in-mem version ;; (define (tasks:open-db #!key (numretries 4)) (if *task-db* *task-db* (handle-exceptions exn (if (> numretries 0) (begin (print-call-chain (current-error-port)) (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " exn=" (condition->list exn)) (thread-sleep! 1) (tasks:open-db numretries (- numretries 1))) (begin (print-call-chain (current-error-port)) (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " exn=" (condition->list exn)))) (let* ((dbpath (tasks:get-task-db-path)) (dbfile (conc dbpath "/monitor.db")) (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away (exists (file-exists? dbpath)) (write-access (file-write-access? dbpath)) (mdb (cond ;; what the hek is *toppath* doing here? ((and (string? *toppath*)(file-write-access? *toppath*)) (sqlite3:open-database dbfile)) ((file-read-access? dbpath) (sqlite3:open-database dbfile)) (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (if (and exists (not write-access)) (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control (sqlite3:set-busy-handler! mdb handler) (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) |
︙ | ︙ | |||
306 307 308 309 310 311 312 | (best #f)) (handle-exceptions exn (begin (debug:print 0 "WARNING: tasks:get-server db access error.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " for run " run-id) | | | 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 | (best #f)) (handle-exceptions exn (begin (debug:print 0 "WARNING: tasks:get-server db access error.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " for run " run-id) (print-call-chain (current-error-port)) (if (> retries 0) (begin (debug:print 0 " trying call to tasks:get-server again in 10 seconds") (thread-sleep! 10) (tasks:get-server mdb run-id retries: (- retries 0))) (debug:print 0 "10 tries of tasks:get-server all crashed and burned. Giving up and returning \"no server found\""))) (sqlite3:for-each-row |
︙ | ︙ |
Modified tests.scm from [9299be9897] to [3b87b51253].
︙ | ︙ | |||
658 659 660 661 662 663 664 | ;; (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) ;; (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up") ;; (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") ;; (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (print "exn=" (condition->list exn)) ;; (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) | | | 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 | ;; (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) ;; (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up") ;; (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") ;; (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (print "exn=" (condition->list exn)) ;; (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) ;; (print-call-chain (current-error-port)))) ;; (let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb)) ;; (cpuload (get-cpu-load)) ;; (diskfree (get-df (current-directory))) ;; (uname (get-uname "-srvpio")) ;; (hostname (get-host-name))) ;; ;(tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) ;; (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) |
︙ | ︙ | |||
686 687 688 689 690 691 692 | (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up") (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) | | | 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 | (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up") (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port)))) (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) ))) ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== (define (test:archive db test-id) #f) (define (test:archive-tests db keynames target) #f) |