feat(02-01): add two-layer memory system — Redis sliding window + pgvector long-term

- ConversationEmbedding ORM model with Vector(384) column (pgvector) - memory_short_key, escalation_status_key, pending_tool_confirm_key in redis_keys.py - orchestrator/memory/short_term.py: RPUSH/LTRIM sliding window (get_recent_messages, append_message) - orchestrator/memory/long_term.py: pgvector HNSW cosine search (retrieve_relevant, store_embedding) - Migration 002: conversation_embeddings table, HNSW index, RLS with FORCE, SELECT/INSERT only - 10 unit tests (fakeredis), 6 integration tests (pgvector) — all passing - Auto-fix [Rule 3]: postgres image updated to pgvector/pgvector:pg16 (extension required)
2026-03-23 14:41:57 -06:00
parent 370a860622
commit 28a5ee996e
11 changed files with 998 additions and 1 deletions
--- a/packages/shared/pyproject.toml
+++ b/packages/shared/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
    "httpx>=0.28.0",
    "slowapi>=0.1.9",
    "bcrypt>=4.0.0",
+    "pgvector>=0.3.0",
 ]

 [tool.hatch.build.targets.wheel]
--- a/packages/shared/shared/models/memory.py
+++ b/packages/shared/shared/models/memory.py
@@ -0,0 +1,96 @@
+"""
+SQLAlchemy 2.0 ORM models for conversational memory.
+
+ConversationEmbedding stores pgvector embeddings of past conversation turns
+for long-term semantic retrieval across sessions. This is the persistence layer
+for the long-term memory module in the Agent Orchestrator.
+
+IMPORTANT:
+- Embeddings are immutable (no UPDATE) — like audit records. We store and read
+  but never modify. This simplifies the data model and prevents mutation bugs.
+- RLS is ENABLED with FORCE — tenant_id isolation is enforced at the DB level.
+- The vector dimension (384) corresponds to all-MiniLM-L6-v2 output size.
+"""
+
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+
+from pgvector.sqlalchemy import Vector
+from sqlalchemy import DateTime, ForeignKey, Text, func
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from shared.models.tenant import Base
+
+
+class ConversationEmbedding(Base):
+    """
+    A single embedded conversation turn stored for long-term recall.
+
+    Each row represents one message (user or assistant) converted to a
+    384-dimensional embedding via all-MiniLM-L6-v2. The Agent Orchestrator
+    queries this table at prompt assembly time to inject relevant past context.
+
+    Scoped by:
+      - tenant_id: RLS enforced isolation between tenants
+      - agent_id: isolation between agents within a tenant
+      - user_id: isolation between end-users of the same agent
+
+    RLS policy enforces:
+        tenant_id = current_setting('app.current_tenant', TRUE)::uuid
+
+    FORCE ROW LEVEL SECURITY ensures even the table owner cannot bypass this.
+    """
+
+    __tablename__ = "conversation_embeddings"
+
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        primary_key=True,
+        default=uuid.uuid4,
+    )
+    tenant_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("tenants.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    agent_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        nullable=False,
+        index=True,
+    )
+    user_id: Mapped[str] = mapped_column(
+        Text,
+        nullable=False,
+        comment="Channel-native user identifier (e.g. Slack user ID U12345)",
+    )
+    content: Mapped[str] = mapped_column(
+        Text,
+        nullable=False,
+        comment="Original message text that was embedded",
+    )
+    role: Mapped[str] = mapped_column(
+        Text,
+        nullable=False,
+        comment="Message role: 'user' or 'assistant'",
+    )
+    embedding: Mapped[list[float]] = mapped_column(
+        Vector(384),
+        nullable=False,
+        comment="all-MiniLM-L6-v2 embedding (384 dimensions)",
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        server_default=func.now(),
+    )
+
+    def __repr__(self) -> str:
+        return (
+            f"<ConversationEmbedding id={self.id} "
+            f"tenant_id={self.tenant_id} agent_id={self.agent_id} "
+            f"user_id={self.user_id!r} role={self.role!r}>"
+        )
--- a/packages/shared/shared/redis_keys.py
+++ b/packages/shared/shared/redis_keys.py
@@ -86,3 +86,61 @@ def engaged_thread_key(tenant_id: str, thread_id: str) -> str:
        Namespaced Redis key: "{tenant_id}:engaged:{thread_id}"
    """
    return f"{tenant_id}:engaged:{thread_id}"
+
+
+def memory_short_key(tenant_id: str, agent_id: str, user_id: str) -> str:
+    """
+    Redis key for the short-term conversational memory sliding window.
+
+    Stores the last N messages (serialized as JSON) for a specific
+    tenant + agent + user combination. Used by the Agent Orchestrator to
+    inject recent conversation history into every LLM prompt.
+
+    Key includes all three discriminators to ensure:
+    - Two users talking to the same agent have separate histories
+    - The same user talking to two different agents has separate histories
+    - Two tenants with the same agent/user IDs are fully isolated
+
+    Args:
+        tenant_id: Konstruct tenant identifier.
+        agent_id:  Agent identifier (UUID string).
+        user_id:   End-user identifier (channel-native, e.g. Slack user ID).
+
+    Returns:
+        Namespaced Redis key: "{tenant_id}:memory:short:{agent_id}:{user_id}"
+    """
+    return f"{tenant_id}:memory:short:{agent_id}:{user_id}"
+
+
+def escalation_status_key(tenant_id: str, thread_id: str) -> str:
+    """
+    Redis key for tracking escalation status of a thread.
+
+    Stores the current escalation state for a conversation thread —
+    whether it has been escalated to a human or another agent.
+
+    Args:
+        tenant_id: Konstruct tenant identifier.
+        thread_id: Thread identifier.
+
+    Returns:
+        Namespaced Redis key: "{tenant_id}:escalation:{thread_id}"
+    """
+    return f"{tenant_id}:escalation:{thread_id}"
+
+
+def pending_tool_confirm_key(tenant_id: str, thread_id: str) -> str:
+    """
+    Redis key for tracking pending tool confirmation requests.
+
+    Stores the pending tool invocation that requires explicit user
+    confirmation before execution (e.g. destructive operations).
+
+    Args:
+        tenant_id: Konstruct tenant identifier.
+        thread_id: Thread identifier.
+
+    Returns:
+        Namespaced Redis key: "{tenant_id}:tool_confirm:{thread_id}"
+    """
+    return f"{tenant_id}:tool_confirm:{thread_id}"