fix: streaming timeout + WebSocket close guard

- Streaming httpx client uses 300s read timeout (cloud LLMs can take
  30-60s for first token). Was using 120s general timeout.
- Guard all WebSocket sends with try/except for client disconnect.
  Prevents "Cannot send once close message has been sent" crash.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-25 18:39:32 -06:00
parent 6c1086046f
commit 17f6d7cb4b
2 changed files with 19 additions and 13 deletions

View File

@@ -52,6 +52,8 @@ _FALLBACK_RESPONSE = (
# Timeout for LLM pool HTTP requests — generous to allow slow local inference
_LLM_TIMEOUT = httpx.Timeout(timeout=120.0, connect=10.0)
# Streaming needs a longer read timeout — first token can take 30-60s with cloud models
_LLM_STREAM_TIMEOUT = httpx.Timeout(timeout=300.0, connect=10.0, read=300.0)
# Maximum number of tool-call iterations before breaking the loop
_MAX_TOOL_ITERATIONS = 5
@@ -321,7 +323,7 @@ async def run_agent_streaming(
}
try:
async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
async with httpx.AsyncClient(timeout=_LLM_STREAM_TIMEOUT) as client:
async with client.stream("POST", llm_stream_url, json=payload) as response:
if response.status_code != 200:
logger.error(