Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -2246,11 +2246,13 @@
 	     (z3:decode-buffer
 	      (base64:base64-decode
 	       (string-substitute 
 		(regexp "_") "=" msg #t)))
 	   (lambda ()(deserialize)))
-	 (vector #f #f #f))) ;; crude reply for when things go awry
+	 (begin
+	   (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.")
+	   #f))) ;; crude reply for when things go awry
     ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
     (else msg)))
 
 (define (db:test-set-status-state dbstruct run-id test-id status state msg)
   (let ((dbdat  (db:get-db dbstruct run-id)))

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -89,11 +89,14 @@
 	 (jparams         (db:obj->string params)))
     (if connection-info
 	(let ((res             (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
 	  (http-transport:server-dat-update-last-access connection-info)
 	  (if res
-	      (db:string->obj res)
+	      (or(db:string->obj res)
+		 (begin
+		   (thread-sleep! 0.5)
+		   (rmt:send-receive cmd rid params attempnum: (+ attemptnum 1))))
 	      (begin ;; let ((new-connection-info (client:setup run-id)))
 		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
 		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
 
 		;; no longer killing the server in http-transport:client-api-send-receive

Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -73,10 +73,11 @@
 	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
 	     (print "exn=" (condition->list exn))
 	     (thread-sleep! 1)
 	     (tasks:open-db numretries (- numretries 1)))
 	   (begin
+	     (print-call-chain)
 	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
 	     (print "exn=" (condition->list exn))))
        (let* ((dbpath       (tasks:get-task-db-path))
 	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
 	      (exists       (file-exists? dbpath))