feat(02-01): add two-layer memory system — Redis sliding window + pgvector long-term
- ConversationEmbedding ORM model with Vector(384) column (pgvector) - memory_short_key, escalation_status_key, pending_tool_confirm_key in redis_keys.py - orchestrator/memory/short_term.py: RPUSH/LTRIM sliding window (get_recent_messages, append_message) - orchestrator/memory/long_term.py: pgvector HNSW cosine search (retrieve_relevant, store_embedding) - Migration 002: conversation_embeddings table, HNSW index, RLS with FORCE, SELECT/INSERT only - 10 unit tests (fakeredis), 6 integration tests (pgvector) — all passing - Auto-fix [Rule 3]: postgres image updated to pgvector/pgvector:pg16 (extension required)
This commit is contained in:
96
packages/shared/shared/models/memory.py
Normal file
96
packages/shared/shared/models/memory.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
SQLAlchemy 2.0 ORM models for conversational memory.
|
||||
|
||||
ConversationEmbedding stores pgvector embeddings of past conversation turns
|
||||
for long-term semantic retrieval across sessions. This is the persistence layer
|
||||
for the long-term memory module in the Agent Orchestrator.
|
||||
|
||||
IMPORTANT:
|
||||
- Embeddings are immutable (no UPDATE) — like audit records. We store and read
|
||||
but never modify. This simplifies the data model and prevents mutation bugs.
|
||||
- RLS is ENABLED with FORCE — tenant_id isolation is enforced at the DB level.
|
||||
- The vector dimension (384) corresponds to all-MiniLM-L6-v2 output size.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy import DateTime, ForeignKey, Text, func
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from shared.models.tenant import Base
|
||||
|
||||
|
||||
class ConversationEmbedding(Base):
|
||||
"""
|
||||
A single embedded conversation turn stored for long-term recall.
|
||||
|
||||
Each row represents one message (user or assistant) converted to a
|
||||
384-dimensional embedding via all-MiniLM-L6-v2. The Agent Orchestrator
|
||||
queries this table at prompt assembly time to inject relevant past context.
|
||||
|
||||
Scoped by:
|
||||
- tenant_id: RLS enforced isolation between tenants
|
||||
- agent_id: isolation between agents within a tenant
|
||||
- user_id: isolation between end-users of the same agent
|
||||
|
||||
RLS policy enforces:
|
||||
tenant_id = current_setting('app.current_tenant', TRUE)::uuid
|
||||
|
||||
FORCE ROW LEVEL SECURITY ensures even the table owner cannot bypass this.
|
||||
"""
|
||||
|
||||
__tablename__ = "conversation_embeddings"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
)
|
||||
tenant_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("tenants.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
agent_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
user_id: Mapped[str] = mapped_column(
|
||||
Text,
|
||||
nullable=False,
|
||||
comment="Channel-native user identifier (e.g. Slack user ID U12345)",
|
||||
)
|
||||
content: Mapped[str] = mapped_column(
|
||||
Text,
|
||||
nullable=False,
|
||||
comment="Original message text that was embedded",
|
||||
)
|
||||
role: Mapped[str] = mapped_column(
|
||||
Text,
|
||||
nullable=False,
|
||||
comment="Message role: 'user' or 'assistant'",
|
||||
)
|
||||
embedding: Mapped[list[float]] = mapped_column(
|
||||
Vector(384),
|
||||
nullable=False,
|
||||
comment="all-MiniLM-L6-v2 embedding (384 dimensions)",
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"<ConversationEmbedding id={self.id} "
|
||||
f"tenant_id={self.tenant_id} agent_id={self.agent_id} "
|
||||
f"user_id={self.user_id!r} role={self.role!r}>"
|
||||
)
|
||||
@@ -86,3 +86,61 @@ def engaged_thread_key(tenant_id: str, thread_id: str) -> str:
|
||||
Namespaced Redis key: "{tenant_id}:engaged:{thread_id}"
|
||||
"""
|
||||
return f"{tenant_id}:engaged:{thread_id}"
|
||||
|
||||
|
||||
def memory_short_key(tenant_id: str, agent_id: str, user_id: str) -> str:
|
||||
"""
|
||||
Redis key for the short-term conversational memory sliding window.
|
||||
|
||||
Stores the last N messages (serialized as JSON) for a specific
|
||||
tenant + agent + user combination. Used by the Agent Orchestrator to
|
||||
inject recent conversation history into every LLM prompt.
|
||||
|
||||
Key includes all three discriminators to ensure:
|
||||
- Two users talking to the same agent have separate histories
|
||||
- The same user talking to two different agents has separate histories
|
||||
- Two tenants with the same agent/user IDs are fully isolated
|
||||
|
||||
Args:
|
||||
tenant_id: Konstruct tenant identifier.
|
||||
agent_id: Agent identifier (UUID string).
|
||||
user_id: End-user identifier (channel-native, e.g. Slack user ID).
|
||||
|
||||
Returns:
|
||||
Namespaced Redis key: "{tenant_id}:memory:short:{agent_id}:{user_id}"
|
||||
"""
|
||||
return f"{tenant_id}:memory:short:{agent_id}:{user_id}"
|
||||
|
||||
|
||||
def escalation_status_key(tenant_id: str, thread_id: str) -> str:
|
||||
"""
|
||||
Redis key for tracking escalation status of a thread.
|
||||
|
||||
Stores the current escalation state for a conversation thread —
|
||||
whether it has been escalated to a human or another agent.
|
||||
|
||||
Args:
|
||||
tenant_id: Konstruct tenant identifier.
|
||||
thread_id: Thread identifier.
|
||||
|
||||
Returns:
|
||||
Namespaced Redis key: "{tenant_id}:escalation:{thread_id}"
|
||||
"""
|
||||
return f"{tenant_id}:escalation:{thread_id}"
|
||||
|
||||
|
||||
def pending_tool_confirm_key(tenant_id: str, thread_id: str) -> str:
|
||||
"""
|
||||
Redis key for tracking pending tool confirmation requests.
|
||||
|
||||
Stores the pending tool invocation that requires explicit user
|
||||
confirmation before execution (e.g. destructive operations).
|
||||
|
||||
Args:
|
||||
tenant_id: Konstruct tenant identifier.
|
||||
thread_id: Thread identifier.
|
||||
|
||||
Returns:
|
||||
Namespaced Redis key: "{tenant_id}:tool_confirm:{thread_id}"
|
||||
"""
|
||||
return f"{tenant_id}:tool_confirm:{thread_id}"
|
||||
|
||||
Reference in New Issue
Block a user