- Migration 014: add status/error_message/chunk_count to kb_documents, make agent_id nullable - Add GOOGLE_CALENDAR to ChannelTypeEnum in tenant.py - Add brave_api_key, firecrawl_api_key, google_client_id/secret, minio_kb_bucket to config - Add text extractors for PDF, DOCX, PPTX, XLSX/XLS, CSV, TXT, MD - Add KB management API router with upload, list, delete, URL ingest, reindex endpoints - Install pypdf, python-docx, python-pptx, openpyxl, pandas, firecrawl-py, youtube-transcript-api - Update .env.example with new env vars - Unit tests: test_extractors.py (10 tests) and test_kb_upload.py (7 tests) all pass
85 lines
2.5 KiB
Python
85 lines
2.5 KiB
Python
"""KB document status columns and agent_id nullable
|
|
|
|
Revision ID: 014
|
|
Revises: 013
|
|
Create Date: 2026-03-26
|
|
|
|
Changes:
|
|
- kb_documents.status TEXT NOT NULL DEFAULT 'processing' (CHECK constraint)
|
|
- kb_documents.error_message TEXT NULL
|
|
- kb_documents.chunk_count INTEGER NULL
|
|
- kb_documents.agent_id DROP NOT NULL (make nullable — KB is per-tenant, not per-agent)
|
|
|
|
Note: google_calendar channel type was added in migration 013.
|
|
This migration is numbered 014 and depends on 013.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Sequence, Union
|
|
|
|
import sqlalchemy as sa
|
|
from alembic import op
|
|
|
|
revision: str = "014"
|
|
down_revision: Union[str, None] = "013"
|
|
branch_labels: Union[str, Sequence[str], None] = None
|
|
depends_on: Union[str, Sequence[str], None] = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
# --------------------------------------------------------------------------
|
|
# 1. Add status, error_message, chunk_count columns to kb_documents
|
|
# --------------------------------------------------------------------------
|
|
op.add_column(
|
|
"kb_documents",
|
|
sa.Column(
|
|
"status",
|
|
sa.Text(),
|
|
nullable=False,
|
|
server_default="processing",
|
|
comment="Document ingestion status: processing | ready | error",
|
|
),
|
|
)
|
|
op.add_column(
|
|
"kb_documents",
|
|
sa.Column(
|
|
"error_message",
|
|
sa.Text(),
|
|
nullable=True,
|
|
comment="Error details when status='error'",
|
|
),
|
|
)
|
|
op.add_column(
|
|
"kb_documents",
|
|
sa.Column(
|
|
"chunk_count",
|
|
sa.Integer(),
|
|
nullable=True,
|
|
comment="Number of chunks created after ingestion",
|
|
),
|
|
)
|
|
|
|
# CHECK constraint on status values
|
|
op.create_check_constraint(
|
|
"ck_kb_documents_status",
|
|
"kb_documents",
|
|
"status IN ('processing', 'ready', 'error')",
|
|
)
|
|
|
|
# --------------------------------------------------------------------------
|
|
# 2. Make agent_id nullable — KB is per-tenant, not per-agent
|
|
# --------------------------------------------------------------------------
|
|
op.alter_column("kb_documents", "agent_id", nullable=True)
|
|
|
|
|
|
def downgrade() -> None:
|
|
# Restore agent_id NOT NULL
|
|
op.alter_column("kb_documents", "agent_id", nullable=False)
|
|
|
|
# Drop added columns
|
|
op.drop_constraint("ck_kb_documents_status", "kb_documents", type_="check")
|
|
op.drop_column("kb_documents", "chunk_count")
|
|
op.drop_column("kb_documents", "error_message")
|
|
op.drop_column("kb_documents", "status")
|