Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -253,11 +253,11 @@ (assert ok "FATAL: database file and run-id not aligned."))))) (ttdat *server-info*) (server-state (tt-state ttdat)) (status (cond ;; ((> newcount 600) 'busy) - ((> newcount 5) 'loaded) ;; this gets transmitted to the client which calls tt:backoff-incr to slow stuff down. + ((> newcount 3) 'loaded) ;; this gets transmitted to the client which calls tt:backoff-incr to slow stuff down. (else 'ok))) (errmsg (case status ((busy) (conc "Server overloaded, "newcount" threads in flight")) ((loaded) (conc "Server loaded, "newcount" threads in flight")) (else #f))) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2596,11 +2596,11 @@ ;; (define (db:get-test-info-by-id dbstruct run-id test-id) (db:with-db dbstruct run-id - #f + #t ;; treat as high load and run under mutex (lambda (dbdat db) (let ((res #f)) (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -250,19 +250,20 @@ (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) (servinf (tt-conn-servinf-file conn))) ;;(servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath) - (hash-table-set! (tt-conns ttdat) dbfname #f) + (hash-table-delete! (tt-conns ttdat) dbfname) (if (and servinf (file-exists? servinf)) (begin (if (< attemptnum 3) (begin (thread-sleep! 0.25) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (begin (debug:print 0 *default-log-port* "INFO: no response from server "host":"port" for "dbfname) + (hash-table-delete! (tt-conns ttdat) dbfname) (if (and (file-exists? servinf) (> (- (current-seconds)(file-modification-time servinf)) 60)) (begin (debug:print 0 *default-log-port* "INFO: "servinf" file seems old and no ping response, removing it.") (handle-exceptions @@ -762,10 +763,11 @@ ;; (define (tt:start-tcp-server ttdat) (setup-listener-portlogger ttdat) ;; set up tcp-listener (let* ((socket (tt-socket ttdat)) (handler (tt-handler ttdat)) ;; the handler comes from our client setting a handler function + (fatal-err #f) ;; (handler-proc (lambda () (let* ((indat (deserialize)) (result #f) (exn-result #f) (stdout-result (with-output-to-string @@ -775,13 +777,14 @@ (let* ((errdat (condition->list exn))) (set! exn-result errdat) (debug:print 0 *default-log-port* "ERROR: handler exception, these are bad, will exit in five seconds.") (pp errdat *default-log-port*) ;; these are always bad, set up an exit thread - (thread-start! (make-thread (lambda () + #;(thread-start! (make-thread (lambda () (thread-sleep! 5) - (exit)))) + (exit)))) + (set! fatal-err #t) #f) (handler indat) ;; this is the proc being called by the remote client ))) (set! result res))))) (full-result (list result exn-result (if (equal? stdout-result "") #f stdout-result)))) @@ -789,32 +792,18 @@ exn (begin (debug:print 0 *default-log-port* "Serialization failure. full-result="full-result) ;; (serialize '(#f #f #f)) ;; doesn't work - the first call to serialize caused failure ) - (serialize full-result)))))) + (serialize full-result)) + (if fatal-err (exit)) + ;;(assert fatal-err "FATAL: exception in handler.") + )))) ((make-tcp-server socket handler-proc) #f ;; yes, send error messages to std-err ))) -;; create a tcp listener and return a populated udat struct with -;; my port, address, hostname, pid etc. -;; return #f if fail to find a port to allocate. -;; -;; if udata-in is #f create the record -;; if there is already a serv-listener return the udata -;; -;; (define (setup-listener uconn #!optional (port 4242)) -;; (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) -;; (handle-exceptions -;; exn -;; (if (< port 65535) -;; (begin -;; (thread-sleep! 0.25) -;; (setup-listener uconn (+ port 1))) -;; #f) -;; (connect-listener uconn port))) (define (setup-listener-portlogger uconn) (let ((port (portlogger:open-run-close portlogger:find-port))) (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) (handle-exceptions