feat(02-01): add two-layer memory system — Redis sliding window + pgvector long-term
- ConversationEmbedding ORM model with Vector(384) column (pgvector) - memory_short_key, escalation_status_key, pending_tool_confirm_key in redis_keys.py - orchestrator/memory/short_term.py: RPUSH/LTRIM sliding window (get_recent_messages, append_message) - orchestrator/memory/long_term.py: pgvector HNSW cosine search (retrieve_relevant, store_embedding) - Migration 002: conversation_embeddings table, HNSW index, RLS with FORCE, SELECT/INSERT only - 10 unit tests (fakeredis), 6 integration tests (pgvector) — all passing - Auto-fix [Rule 3]: postgres image updated to pgvector/pgvector:pg16 (extension required)
This commit is contained in:
259
tests/integration/test_memory_long_term.py
Normal file
259
tests/integration/test_memory_long_term.py
Normal file
@@ -0,0 +1,259 @@
|
||||
"""
|
||||
Integration tests for pgvector long-term memory.
|
||||
|
||||
Requires a live PostgreSQL instance with pgvector extension installed.
|
||||
Tests are automatically skipped if the database is not available
|
||||
(fixture from conftest.py handles that via pytest.skip).
|
||||
|
||||
Key scenarios tested:
|
||||
- store_embedding inserts with correct scoping
|
||||
- retrieve_relevant returns matching content above threshold
|
||||
- Cross-tenant isolation: tenant A's embeddings never returned for tenant B
|
||||
- High threshold returns empty list for dissimilar queries
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from orchestrator.memory.long_term import retrieve_relevant, store_embedding
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def agent_a_id() -> uuid.UUID:
|
||||
"""Return a stable agent UUID for tenant A tests."""
|
||||
return uuid.UUID("aaaaaaaa-0000-0000-0000-000000000001")
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def agent_b_id() -> uuid.UUID:
|
||||
"""Return a stable agent UUID for tenant B tests."""
|
||||
return uuid.UUID("bbbbbbbb-0000-0000-0000-000000000002")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_store_embedding_inserts_row(
|
||||
db_session: AsyncSession,
|
||||
tenant_a: dict,
|
||||
agent_a_id: uuid.UUID,
|
||||
):
|
||||
"""store_embedding inserts a row into conversation_embeddings."""
|
||||
from shared.rls import current_tenant_id
|
||||
|
||||
tenant_id = tenant_a["id"]
|
||||
user_id = "user-store-test"
|
||||
embedding = [0.1] * 384
|
||||
content = "I prefer concise answers."
|
||||
|
||||
token = current_tenant_id.set(tenant_id)
|
||||
try:
|
||||
await store_embedding(db_session, tenant_id, agent_a_id, user_id, content, "user", embedding)
|
||||
await db_session.commit()
|
||||
|
||||
result = await db_session.execute(
|
||||
text("SELECT content, role FROM conversation_embeddings WHERE tenant_id = :tid AND user_id = :uid"),
|
||||
{"tid": str(tenant_id), "uid": user_id},
|
||||
)
|
||||
rows = result.fetchall()
|
||||
finally:
|
||||
current_tenant_id.reset(token)
|
||||
|
||||
assert len(rows) == 1
|
||||
assert rows[0].content == content
|
||||
assert rows[0].role == "user"
|
||||
|
||||
|
||||
async def test_retrieve_relevant_returns_similar_content(
|
||||
db_session: AsyncSession,
|
||||
tenant_a: dict,
|
||||
agent_a_id: uuid.UUID,
|
||||
):
|
||||
"""retrieve_relevant returns content above cosine similarity threshold."""
|
||||
from shared.rls import current_tenant_id
|
||||
|
||||
tenant_id = tenant_a["id"]
|
||||
user_id = "user-retrieve-test"
|
||||
|
||||
# Store two embeddings: one very similar to the query, one dissimilar
|
||||
# We simulate similarity by using identical embeddings
|
||||
similar_embedding = [1.0] + [0.0] * 383
|
||||
dissimilar_embedding = [0.0] * 383 + [1.0]
|
||||
query_embedding = [1.0] + [0.0] * 383 # identical to similar_embedding
|
||||
|
||||
token = current_tenant_id.set(tenant_id)
|
||||
try:
|
||||
await store_embedding(
|
||||
db_session, tenant_id, agent_a_id, user_id,
|
||||
"The user likes Python programming.", "user", similar_embedding
|
||||
)
|
||||
await store_embedding(
|
||||
db_session, tenant_id, agent_a_id, user_id,
|
||||
"This is completely unrelated content.", "user", dissimilar_embedding
|
||||
)
|
||||
await db_session.commit()
|
||||
|
||||
results = await retrieve_relevant(
|
||||
db_session, tenant_id, agent_a_id, user_id, query_embedding, top_k=3, threshold=0.5
|
||||
)
|
||||
finally:
|
||||
current_tenant_id.reset(token)
|
||||
|
||||
# Should return the similar content
|
||||
assert len(results) >= 1
|
||||
assert any("Python" in r for r in results)
|
||||
|
||||
|
||||
async def test_retrieve_relevant_high_threshold_returns_empty(
|
||||
db_session: AsyncSession,
|
||||
tenant_a: dict,
|
||||
agent_a_id: uuid.UUID,
|
||||
):
|
||||
"""retrieve_relevant with threshold=0.99 and dissimilar query returns empty list."""
|
||||
from shared.rls import current_tenant_id
|
||||
|
||||
tenant_id = tenant_a["id"]
|
||||
user_id = "user-threshold-test"
|
||||
|
||||
# Store an embedding pointing in one direction
|
||||
stored_embedding = [1.0] + [0.0] * 383
|
||||
# Query pointing in orthogonal direction — cosine distance ~= 1.0, similarity ~= 0.0
|
||||
query_embedding = [0.0] + [1.0] + [0.0] * 382
|
||||
|
||||
token = current_tenant_id.set(tenant_id)
|
||||
try:
|
||||
await store_embedding(
|
||||
db_session, tenant_id, agent_a_id, user_id,
|
||||
"Some stored content.", "user", stored_embedding
|
||||
)
|
||||
await db_session.commit()
|
||||
|
||||
results = await retrieve_relevant(
|
||||
db_session, tenant_id, agent_a_id, user_id, query_embedding, top_k=3, threshold=0.99
|
||||
)
|
||||
finally:
|
||||
current_tenant_id.reset(token)
|
||||
|
||||
assert results == []
|
||||
|
||||
|
||||
async def test_cross_tenant_isolation(
|
||||
db_session: AsyncSession,
|
||||
tenant_a: dict,
|
||||
tenant_b: dict,
|
||||
agent_a_id: uuid.UUID,
|
||||
agent_b_id: uuid.UUID,
|
||||
):
|
||||
"""
|
||||
retrieve_relevant with tenant_id=A NEVER returns tenant_id=B embeddings.
|
||||
|
||||
This is the critical security test — cross-tenant contamination would be
|
||||
a catastrophic data leak in a multi-tenant system.
|
||||
"""
|
||||
from shared.rls import current_tenant_id
|
||||
|
||||
user_id = "shared-user-id"
|
||||
tenant_a_id = tenant_a["id"]
|
||||
tenant_b_id = tenant_b["id"]
|
||||
|
||||
# Same query embedding for both tenants
|
||||
embedding = [1.0] + [0.0] * 383
|
||||
|
||||
# Store embedding for tenant B
|
||||
token = current_tenant_id.set(tenant_b_id)
|
||||
try:
|
||||
await store_embedding(
|
||||
db_session, tenant_b_id, agent_b_id, user_id,
|
||||
"Tenant B secret information.", "user", embedding
|
||||
)
|
||||
await db_session.commit()
|
||||
finally:
|
||||
current_tenant_id.reset(token)
|
||||
|
||||
# Query as tenant A — should NOT see tenant B's data
|
||||
token = current_tenant_id.set(tenant_a_id)
|
||||
try:
|
||||
results = await retrieve_relevant(
|
||||
db_session, tenant_a_id, agent_a_id, user_id, embedding, top_k=10, threshold=0.0
|
||||
)
|
||||
finally:
|
||||
current_tenant_id.reset(token)
|
||||
|
||||
# Tenant A should get nothing — it has no embeddings of its own
|
||||
# and it MUST NOT see tenant B's embeddings
|
||||
for result in results:
|
||||
assert "Tenant B" not in result, "Cross-tenant data leakage detected!"
|
||||
|
||||
|
||||
async def test_retrieve_relevant_user_isolation(
|
||||
db_session: AsyncSession,
|
||||
tenant_a: dict,
|
||||
agent_a_id: uuid.UUID,
|
||||
):
|
||||
"""retrieve_relevant for user A never returns user B embeddings."""
|
||||
from shared.rls import current_tenant_id
|
||||
|
||||
tenant_id = tenant_a["id"]
|
||||
embedding = [1.0] + [0.0] * 383
|
||||
|
||||
token = current_tenant_id.set(tenant_id)
|
||||
try:
|
||||
await store_embedding(
|
||||
db_session, tenant_id, agent_a_id, "user-A",
|
||||
"User A private information.", "user", embedding
|
||||
)
|
||||
await db_session.commit()
|
||||
|
||||
# Query as user B — should not see user A's data
|
||||
results = await retrieve_relevant(
|
||||
db_session, tenant_id, agent_a_id, "user-B", embedding, top_k=10, threshold=0.0
|
||||
)
|
||||
finally:
|
||||
current_tenant_id.reset(token)
|
||||
|
||||
for result in results:
|
||||
assert "User A private" not in result
|
||||
|
||||
|
||||
async def test_retrieve_relevant_top_k_limits_results(
|
||||
db_session: AsyncSession,
|
||||
tenant_a: dict,
|
||||
agent_a_id: uuid.UUID,
|
||||
):
|
||||
"""retrieve_relevant respects top_k limit."""
|
||||
from shared.rls import current_tenant_id
|
||||
|
||||
tenant_id = tenant_a["id"]
|
||||
user_id = "user-topk-test"
|
||||
embedding = [1.0] + [0.0] * 383
|
||||
|
||||
token = current_tenant_id.set(tenant_id)
|
||||
try:
|
||||
# Store 5 very similar embeddings
|
||||
for i in range(5):
|
||||
await store_embedding(
|
||||
db_session, tenant_id, agent_a_id, user_id,
|
||||
f"Content item {i}", "user", embedding
|
||||
)
|
||||
await db_session.commit()
|
||||
|
||||
results = await retrieve_relevant(
|
||||
db_session, tenant_id, agent_a_id, user_id, embedding, top_k=2, threshold=0.0
|
||||
)
|
||||
finally:
|
||||
current_tenant_id.reset(token)
|
||||
|
||||
assert len(results) <= 2
|
||||
Reference in New Issue
Block a user