Overview
Comment: | possible fixes to test |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | v1.80-possible-fixes |
Files: | files | file ages | folders |
SHA1: |
4ef17eb32cebc2b536515dc638d375d8 |
User & Date: | matt on 2023-04-17 16:51:54 |
Other Links: | branch diff | manifest | tags |
Context
2023-04-17
| ||
16:51 | possible fixes to test Leaf check-in: 4ef17eb32c user: matt tags: v1.80-possible-fixes | |
2023-04-16
| ||
16:43 | possible fix for bind issue check-in: 66f2b72697 user: matt tags: v1.80 | |
Changes
Modified api.scm from [9008afe383] to [a866b2c510].
︙ | ︙ | |||
251 252 253 254 255 256 257 | (else (if (not ok)(debug:print 0 *default-log-port* "ERROR: "cmd", run-id "run-id", not correct for dbfname "(dbr:dbstruct-dbfname dbstruct))) (assert ok "FATAL: database file and run-id not aligned."))))) (ttdat *server-info*) (server-state (tt-state ttdat)) (status (cond ;; ((> newcount 600) 'busy) | | | 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | (else (if (not ok)(debug:print 0 *default-log-port* "ERROR: "cmd", run-id "run-id", not correct for dbfname "(dbr:dbstruct-dbfname dbstruct))) (assert ok "FATAL: database file and run-id not aligned."))))) (ttdat *server-info*) (server-state (tt-state ttdat)) (status (cond ;; ((> newcount 600) 'busy) ((> newcount 3) 'loaded) ;; this gets transmitted to the client which calls tt:backoff-incr to slow stuff down. (else 'ok))) (errmsg (case status ((busy) (conc "Server overloaded, "newcount" threads in flight")) ((loaded) (conc "Server loaded, "newcount" threads in flight")) (else #f))) (result (case status ((busy) (- newcount 29)) ;; call back in as many seconds |
︙ | ︙ |
Modified db.scm from [3353e0f2ec] to [baa32a3826].
︙ | ︙ | |||
2594 2595 2596 2597 2598 2599 2600 | ;; Get test data using test_id ;; (define (db:get-test-info-by-id dbstruct run-id test-id) (db:with-db dbstruct run-id | | | 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 | ;; Get test data using test_id ;; (define (db:get-test-info-by-id dbstruct run-id test-id) (db:with-db dbstruct run-id #t ;; treat as high load and run under mutex (lambda (dbdat db) (let ((res #f)) (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) db |
︙ | ︙ |
Modified tcp-transportmod.scm from [9c6068b733] to [9520aa669a].
︙ | ︙ | |||
248 249 250 251 252 253 254 | (if (not res) ;; tt:handler is telling us that communication failed (let* ((host (tt-conn-host conn)) (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) (servinf (tt-conn-servinf-file conn))) ;;(servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath) | | > | 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | (if (not res) ;; tt:handler is telling us that communication failed (let* ((host (tt-conn-host conn)) (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) (servinf (tt-conn-servinf-file conn))) ;;(servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath) (hash-table-delete! (tt-conns ttdat) dbfname) (if (and servinf (file-exists? servinf)) (begin (if (< attemptnum 3) (begin (thread-sleep! 0.25) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (begin (debug:print 0 *default-log-port* "INFO: no response from server "host":"port" for "dbfname) (hash-table-delete! (tt-conns ttdat) dbfname) (if (and (file-exists? servinf) (> (- (current-seconds)(file-modification-time servinf)) 60)) (begin (debug:print 0 *default-log-port* "INFO: "servinf" file seems old and no ping response, removing it.") (handle-exceptions exn #f |
︙ | ︙ | |||
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 | ;; the server, look at (tt:start-server ...) above for the entry point ;; for the entire server system ;; (define (tt:start-tcp-server ttdat) (setup-listener-portlogger ttdat) ;; set up tcp-listener (let* ((socket (tt-socket ttdat)) (handler (tt-handler ttdat)) ;; the handler comes from our client setting a handler function (handler-proc (lambda () (let* ((indat (deserialize)) (result #f) (exn-result #f) (stdout-result (with-output-to-string (lambda () (let ((res (handle-exceptions exn (let* ((errdat (condition->list exn))) (set! exn-result errdat) (debug:print 0 *default-log-port* "ERROR: handler exception, these are bad, will exit in five seconds.") (pp errdat *default-log-port*) ;; these are always bad, set up an exit thread | > | | > | > > > < < < < < < < < < < < < < < < < < | 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 | ;; the server, look at (tt:start-server ...) above for the entry point ;; for the entire server system ;; (define (tt:start-tcp-server ttdat) (setup-listener-portlogger ttdat) ;; set up tcp-listener (let* ((socket (tt-socket ttdat)) (handler (tt-handler ttdat)) ;; the handler comes from our client setting a handler function (fatal-err #f) ;; (handler-proc (lambda () (let* ((indat (deserialize)) (result #f) (exn-result #f) (stdout-result (with-output-to-string (lambda () (let ((res (handle-exceptions exn (let* ((errdat (condition->list exn))) (set! exn-result errdat) (debug:print 0 *default-log-port* "ERROR: handler exception, these are bad, will exit in five seconds.") (pp errdat *default-log-port*) ;; these are always bad, set up an exit thread #;(thread-start! (make-thread (lambda () (thread-sleep! 5) (exit)))) (set! fatal-err #t) #f) (handler indat) ;; this is the proc being called by the remote client ))) (set! result res))))) (full-result (list result exn-result (if (equal? stdout-result "") #f stdout-result)))) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "Serialization failure. full-result="full-result) ;; (serialize '(#f #f #f)) ;; doesn't work - the first call to serialize caused failure ) (serialize full-result)) (if fatal-err (exit)) ;;(assert fatal-err "FATAL: exception in handler.") )))) ((make-tcp-server socket handler-proc) #f ;; yes, send error messages to std-err ))) (define (setup-listener-portlogger uconn) (let ((port (portlogger:open-run-close portlogger:find-port))) (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) (handle-exceptions exn (if (< port 65535) |
︙ | ︙ |