fix: NullPool for Celery workers + skip pgvector on first message
- Celery workers use NullPool to avoid "Future attached to a different loop" errors from stale pooled async connections across asyncio.run() calls. FastAPI keeps regular pool (single event loop, safe to reuse). - Skip pgvector similarity search when no conversation history exists (first message) — saves ~3s embedding + query overhead. - Wrap pgvector retrieval in try/except to prevent DB errors from blocking the LLM response. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -493,21 +493,26 @@ async def _process_message(
|
||||
)
|
||||
|
||||
# 2. Long-term: pgvector similarity search
|
||||
# Skip if no conversation history exists yet (first message optimization —
|
||||
# embedding + pgvector query adds ~3s before the first token appears)
|
||||
relevant_context: list[str] = []
|
||||
if user_text:
|
||||
query_embedding = embed_text(user_text)
|
||||
rls_token = current_tenant_id.set(tenant_uuid)
|
||||
if user_text and recent_messages:
|
||||
try:
|
||||
async with async_session_factory() as session:
|
||||
relevant_context = await retrieve_relevant(
|
||||
session,
|
||||
tenant_uuid,
|
||||
agent.id,
|
||||
user_id,
|
||||
query_embedding,
|
||||
)
|
||||
finally:
|
||||
current_tenant_id.reset(rls_token)
|
||||
query_embedding = embed_text(user_text)
|
||||
rls_token = current_tenant_id.set(tenant_uuid)
|
||||
try:
|
||||
async with async_session_factory() as session:
|
||||
relevant_context = await retrieve_relevant(
|
||||
session,
|
||||
tenant_uuid,
|
||||
agent.id,
|
||||
user_id,
|
||||
query_embedding,
|
||||
)
|
||||
finally:
|
||||
current_tenant_id.reset(rls_token)
|
||||
except Exception:
|
||||
logger.warning("pgvector retrieval failed — continuing without long-term memory")
|
||||
finally:
|
||||
await redis_client2.aclose()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user