Overview
Comment: | fixed start up wedging |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.80 |
Files: | files | file ages | folders |
SHA1: |
ce4cc8997a57e8e9962dc5105982b58c |
User & Date: | matt on 2023-04-10 07:58:47 |
Other Links: | branch diff | manifest | tags |
Context
2023-04-10
| ||
11:58 | Merged fork check-in: 962cf22780 user: mrwellan tags: v1.80 | |
07:58 | fixed start up wedging check-in: ce4cc8997a user: matt tags: v1.80 | |
06:09 | load control is working but servers are still getting wedged over time check-in: 9771b5d5a9 user: matt tags: v1.80 | |
Changes
Modified tcp-transportmod.scm from [f7ad6026cc] to [f46139fb7e].
︙ | ︙ | |||
244 245 246 247 248 249 250 | (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) (hash-table-set! (tt-conns ttdat) dbfname #f) (if (file-exists? servinf) (begin | | | | > > > > > > | 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) (hash-table-set! (tt-conns ttdat) dbfname #f) (if (file-exists? servinf) (begin (if (< attemptnum 3) (begin (thread-sleep! 0.25) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (begin (debug:print 0 *default-log-port* "INFO: no response from server "host":"port" for "dbfname) (if (and (file-exists? servinf) (> (- (current-seconds)(file-modification-time servinf)) 60)) (begin (debug:print 0 *default-log-port* "INFO: "servinf" file seems old and no ping response, removing it.") (handle-exceptions exn #f (delete-file* servinf)) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (begin ;; start server - addressed in client-connect-to-server ;; delay - addressed in client-connect-to-server ;; try again (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf) (thread-sleep! 1) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)))) (begin ;; this case is where res is malformed. Probably should abort (assert #f "FATAL: tt:handler received bad data "res) ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.") |
︙ | ︙ | |||
399 400 401 402 403 404 405 | (exn (io-error) (full-err-print exn "ERROR: i/o error") (tt:backoff-incr host port) #f) (exn (i/o net) (if ping-mode #f | > > > > | | | | | | | > | 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | (exn (io-error) (full-err-print exn "ERROR: i/o error") (tt:backoff-incr host port) #f) (exn (i/o net) (if ping-mode #f (cond ((> tries-remaining 4) ;; server likely defunct (tt:backoff-incr host port) #f) ((>= tries-remaining 0) (let* ((backoff-delay (* (- 26 tries-remaining) 0.1))) (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.") (thread-sleep! backoff-delay) (tt:backoff-incr host port) (retry)) (assert #f "FATAL: Too many retries in tt:send-receive-direct")) (else #f)))) (exn () (full-err-print exn "Unhandled exception from client side.") #f)))) ;;====================================================================== ;; server |
︙ | ︙ | |||
648 649 650 651 652 653 654 | (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f #f logf))) (let ((fdat (handle-exceptions exn (begin ;; WARNING: this is potentially dangerous to blanket ignore the errors | | | 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 | (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f #f logf))) (let ((fdat (handle-exceptions exn (begin ;; WARNING: this is potentially dangerous to blanket ignore the errors (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn="(condition->list exn)) '()) ;; no idea what went wrong, call it a bad server, return empty list (with-input-from-file logf read-lines)))) (if (null? fdat) ;; bad data, return bad-dat bad-dat (let loop ((inl (car fdat)) (tail (cdr fdat)) (lnum 0)) |
︙ | ︙ | |||
742 743 744 745 746 747 748 | (let* ((indat (deserialize)) (result #f) (exn-result #f) (stdout-result (with-output-to-string (lambda () (let ((res (handle-exceptions exn | | | > > > > > > | 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 | (let* ((indat (deserialize)) (result #f) (exn-result #f) (stdout-result (with-output-to-string (lambda () (let ((res (handle-exceptions exn (let* ((errdat (condition->list exn))) (set! exn-result errdat) (debug:print 0 *default-log-port* "ERROR: handler exception, these are bad, will exit in five seconds.") (pp errdat *default-log-port*) ;; these are always bad, set up an exit thread (thread-start! (make-thread (lambda () (thread-sleep! 5) (exit)))) #f) (handler indat)))) (set! result res))))) (full-result (list result exn-result (if (equal? stdout-result "") #f stdout-result)))) (handle-exceptions exn (begin |
︙ | ︙ |