feat(02-02): audit model, KB model, migration, and audit logger
- AuditEvent ORM model with tenant_id, action_type, latency_ms, metadata - KnowledgeBaseDocument and KBChunk ORM models for vector KB - Migration 003: audit_events (immutable via REVOKE), kb_documents, kb_chunks with HNSW index and RLS on all tables - AuditLogger with log_llm_call, log_tool_call, log_escalation methods - audit_events immutability enforced at DB level (UPDATE/DELETE rejected) - [Rule 1 - Bug] Fixed CAST(:metadata AS jsonb) for asyncpg compatibility
This commit is contained in:
99
packages/shared/shared/models/audit.py
Normal file
99
packages/shared/shared/models/audit.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
SQLAlchemy 2.0 ORM model for the immutable audit_events table.
|
||||
|
||||
Design:
|
||||
- Append-only: konstruct_app role has SELECT + INSERT only (enforced via REVOKE in migration)
|
||||
- Tenant-scoped via RLS — every query sees only the current tenant's rows
|
||||
- action_type discriminates between llm_call, tool_invocation, and escalation events
|
||||
|
||||
Important: The DB-level REVOKE UPDATE/DELETE on audit_events means that even if
|
||||
application code accidentally attempts an UPDATE or DELETE, the database will reject it.
|
||||
This is a hard compliance guarantee.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import DateTime, Integer, Text, func
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||
|
||||
|
||||
class AuditBase(DeclarativeBase):
|
||||
"""Separate declarative base for audit models to avoid conflicts with tenant Base."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AuditEvent(AuditBase):
|
||||
"""
|
||||
Immutable record of every LLM call, tool invocation, and escalation event.
|
||||
|
||||
RLS is enabled — rows are scoped to the current tenant via app.current_tenant.
|
||||
The konstruct_app role has SELECT + INSERT only — UPDATE and DELETE are revoked.
|
||||
|
||||
action_type values:
|
||||
'llm_call' — LLM completion request/response
|
||||
'tool_invocation' — Tool execution (success or failure)
|
||||
'escalation' — Agent handoff to human or another agent
|
||||
"""
|
||||
|
||||
__tablename__ = "audit_events"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
server_default=func.gen_random_uuid(),
|
||||
)
|
||||
tenant_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
agent_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=True,
|
||||
)
|
||||
user_id: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Channel-native user identifier",
|
||||
)
|
||||
action_type: Mapped[str] = mapped_column(
|
||||
Text,
|
||||
nullable=False,
|
||||
comment="llm_call | tool_invocation | escalation",
|
||||
)
|
||||
input_summary: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Truncated input for audit readability (not full content)",
|
||||
)
|
||||
output_summary: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Truncated output for audit readability",
|
||||
)
|
||||
latency_ms: Mapped[int | None] = mapped_column(
|
||||
Integer,
|
||||
nullable=True,
|
||||
comment="Duration of the operation in milliseconds",
|
||||
)
|
||||
metadata: Mapped[dict[str, Any]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
server_default="{}",
|
||||
default=dict,
|
||||
comment="Additional structured context (model name, tool args hash, etc.)",
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<AuditEvent id={self.id} action={self.action_type} tenant={self.tenant_id}>"
|
||||
134
packages/shared/shared/models/kb.py
Normal file
134
packages/shared/shared/models/kb.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
SQLAlchemy 2.0 ORM models for the Knowledge Base tables.
|
||||
|
||||
Tables:
|
||||
kb_documents — uploaded documents belonging to a tenant/agent
|
||||
kb_chunks — text chunks with vector embeddings for semantic search
|
||||
|
||||
The embedding column uses pgvector's vector(384) type, matching the
|
||||
all-MiniLM-L6-v2 model used for embeddings (same as conversation_embeddings).
|
||||
|
||||
RLS is applied to both tables — each tenant's KB is completely isolated.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, Text, func
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
|
||||
class KBBase(DeclarativeBase):
|
||||
"""Separate declarative base for KB models."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class KnowledgeBaseDocument(KBBase):
|
||||
"""
|
||||
A document uploaded to a tenant's knowledge base.
|
||||
|
||||
Documents are chunked into KBChunk rows for vector search.
|
||||
RLS ensures tenant isolation — each tenant sees only their documents.
|
||||
"""
|
||||
|
||||
__tablename__ = "kb_documents"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
server_default=func.gen_random_uuid(),
|
||||
)
|
||||
tenant_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
agent_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Agent this document is associated with",
|
||||
)
|
||||
filename: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Original filename if uploaded as a file",
|
||||
)
|
||||
source_url: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Source URL if ingested from the web",
|
||||
)
|
||||
content_type: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="MIME type: text/plain, application/pdf, etc.",
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
)
|
||||
|
||||
# Relationship
|
||||
chunks: Mapped[list[KBChunk]] = relationship("KBChunk", back_populates="document", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<KnowledgeBaseDocument id={self.id} tenant={self.tenant_id}>"
|
||||
|
||||
|
||||
class KBChunk(KBBase):
|
||||
"""
|
||||
A text chunk from a knowledge base document, with a vector embedding.
|
||||
|
||||
The embedding column is vector(384) — matches all-MiniLM-L6-v2 output dimensions.
|
||||
The HNSW index in the migration enables fast cosine similarity search.
|
||||
|
||||
RLS ensures tenant isolation — each tenant's chunks are invisible to others.
|
||||
"""
|
||||
|
||||
__tablename__ = "kb_chunks"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
server_default=func.gen_random_uuid(),
|
||||
)
|
||||
tenant_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
document_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("kb_documents.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
content: Mapped[str] = mapped_column(
|
||||
Text,
|
||||
nullable=False,
|
||||
comment="The text content of this chunk",
|
||||
)
|
||||
# embedding is vector(384) — raw DDL in migration, not mapped here
|
||||
# because SQLAlchemy doesn't natively know the pgvector type
|
||||
chunk_index: Mapped[int | None] = mapped_column(
|
||||
Integer,
|
||||
nullable=True,
|
||||
comment="Position of this chunk within its source document (0-indexed)",
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=func.now(),
|
||||
)
|
||||
|
||||
# Relationship
|
||||
document: Mapped[KnowledgeBaseDocument] = relationship("KnowledgeBaseDocument", back_populates="chunks")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<KBChunk id={self.id} document={self.document_id} idx={self.chunk_index}>"
|
||||
Reference in New Issue
Block a user