moxie-rag/app/main.py

#!/usr/bin/env python3
"""
Moxie RAG Service — FastAPI application.
Multi-collection support for tenant isolation.
"""

import os
import sys
import shutil
import logging
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Optional

import httpx
import uvicorn
from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from rag_engine import RAGEngine
from document_processor import extract_text_from_file, extract_audio_from_video

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
LOG_DIR = Path(os.environ.get("LOG_DIR", "/app/logs"))
LOG_DIR.mkdir(parents=True, exist_ok=True)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    handlers=[
        logging.FileHandler(LOG_DIR / "rag_service.log"),
        logging.StreamHandler(sys.stdout),
    ],
)
logger = logging.getLogger("moxie-rag")

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
WHISPER_URL = os.environ.get("WHISPER_URL", "http://host.docker.internal:8081/transcribe")
UPLOAD_DIR = Path(os.environ.get("UPLOAD_DIR", "/app/data/uploads"))
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)

# ---------------------------------------------------------------------------
# Engine
# ---------------------------------------------------------------------------
engine = RAGEngine(data_dir=os.environ.get("CHROMA_DIR", "/app/data/chromadb"))

# ---------------------------------------------------------------------------
# FastAPI
# ---------------------------------------------------------------------------
app = FastAPI(
    title="Moxie RAG Service",
    description="Multi-tenant RAG system for document storage and retrieval",
    version="2.0.0",
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)


# ---------------------------------------------------------------------------
# Request / response models
# ---------------------------------------------------------------------------
class IngestRequest(BaseModel):
    content: str
    title: Optional[str] = None
    source: Optional[str] = None
    date: Optional[str] = None
    doc_type: Optional[str] = "text"
    auto_chunk: bool = True
    collection: Optional[str] = None


class QueryRequest(BaseModel):
    question: str
    top_k: int = 5
    filter_type: Optional[str] = None
    collection: Optional[str] = None


# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@app.get("/")
async def root():
    return {
        "service": "Moxie RAG",
        "version": "2.0.0",
        "device": engine.device,
        "model": engine.model_name,
        "collections": engine.list_collections(),
    }


@app.get("/health")
async def health():
    return {
        "status": "ok",
        "device": engine.device,
        "collections": engine.list_collections(),
    }


@app.get("/collections")
async def list_collections():
    """List all collections and their chunk counts."""
    return {"collections": engine.list_collections()}


@app.post("/ingest")
async def ingest_text(req: IngestRequest):
    """Ingest text content into the vector store."""
    if not req.content.strip():
        raise HTTPException(400, "Content cannot be empty")
    try:
        return engine.ingest(
            content=req.content,
            title=req.title or "Untitled",
            source=req.source or "unknown",
            date=req.date,
            doc_type=req.doc_type or "text",
            auto_chunk=req.auto_chunk,
            collection=req.collection,
        )
    except ValueError as exc:
        raise HTTPException(400, str(exc))


@app.post("/ingest-file")
async def ingest_file(
    file: UploadFile = File(...),
    title: Optional[str] = Form(None),
    source: Optional[str] = Form(None),
    date: Optional[str] = Form(None),
    doc_type: Optional[str] = Form(None),
    collection: Optional[str] = Form(None),
):
    """Upload and ingest a document (PDF, DOCX, TXT, MD, XLSX, XLS, CSV)."""
    suffix = Path(file.filename).suffix.lower()
    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
        content = await file.read()
        tmp.write(content)
        tmp_path = tmp.name

    try:
        text = extract_text_from_file(tmp_path, file.filename)
        if not text.strip():
            raise HTTPException(400, "Could not extract text from file")

        # Keep a copy
        dest = UPLOAD_DIR / f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{file.filename}"
        shutil.copy2(tmp_path, dest)

        return engine.ingest(
            content=text,
            title=title or file.filename,
            source=source or f"file:{file.filename}",
            date=date,
            doc_type=doc_type or suffix.lstrip("."),
            collection=collection,
        )
    except ValueError as exc:
        raise HTTPException(400, str(exc))
    finally:
        os.unlink(tmp_path)


@app.post("/query")
async def query(req: QueryRequest):
    """Semantic search over indexed documents."""
    if not req.question.strip():
        raise HTTPException(400, "Question cannot be empty")
    return engine.query(
        question=req.question,
        top_k=req.top_k,
        filter_type=req.filter_type,
        collection=req.collection,
    )


@app.get("/documents")
async def list_documents(collection: Optional[str] = Query(None)):
    """List all indexed documents."""
    return engine.list_documents(collection=collection)


@app.delete("/documents/{doc_id}")
async def delete_document(doc_id: str, collection: Optional[str] = Query(None)):
    """Delete a document and all its chunks."""
    try:
        return engine.delete_document(doc_id, collection=collection)
    except KeyError as exc:
        raise HTTPException(404, str(exc))


@app.post("/transcribe")
async def transcribe(
    file: UploadFile = File(...),
    auto_ingest: bool = Form(False),
    title: Optional[str] = Form(None),
    source: Optional[str] = Form(None),
    language: Optional[str] = Form(None),
    collection: Optional[str] = Form(None),
):
    """Transcribe audio/video via Whisper, optionally auto-ingest the result."""
    suffix = Path(file.filename).suffix.lower()
    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
        content = await file.read()
        tmp.write(content)
        tmp_path = tmp.name

    audio_path = None
    try:
        # If video, extract audio first
        video_exts = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv"}
        send_path = tmp_path
        if suffix in video_exts:
            logger.info(f"Extracting audio from video: {file.filename}")
            audio_path = extract_audio_from_video(tmp_path)
            send_path = audio_path

        async with httpx.AsyncClient(timeout=600.0) as client:
            with open(send_path, "rb") as audio_file:
                send_name = file.filename if suffix not in video_exts else Path(file.filename).stem + ".wav"
                files = {"file": (send_name, audio_file)}
                resp = await client.post(WHISPER_URL, files=files)

            if resp.status_code != 200:
                raise HTTPException(502, f"Whisper error: {resp.status_code} — {resp.text}")

            result = resp.json()
            transcription = result.get("text", result.get("transcription", ""))

        if not transcription.strip():
            raise HTTPException(400, "Transcription returned empty text")

        response = {
            "filename": file.filename,
            "transcription": transcription,
            "word_count": len(transcription.split()),
        }

        if auto_ingest:
            ingest_result = engine.ingest(
                content=transcription,
                title=title or f"Transcription: {file.filename}",
                source=source or f"audio:{file.filename}",
                doc_type="transcription",
                collection=collection,
            )
            response["ingested"] = True
            response["doc_id"] = ingest_result["doc_id"]
            response["chunks_created"] = ingest_result["chunks_created"]
            response["collection"] = ingest_result["collection"]
        else:
            response["ingested"] = False

        logger.info(f"Transcribed '{file.filename}' ({response['word_count']} words)")
        return response

    finally:
        os.unlink(tmp_path)
        if audio_path and os.path.exists(audio_path):
            os.unlink(audio_path)


# ---------------------------------------------------------------------------
# Entrypoint
# ---------------------------------------------------------------------------
if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8899, log_level="info")


# ---------------------------------------------------------------------------
# Memory Store Integration
# ---------------------------------------------------------------------------
from memory_store import MemoryStore

MEMORY_DB = os.environ.get("MEMORY_DB", "/app/data/memory.db")
memory = MemoryStore(db_path=MEMORY_DB, rag_engine=engine)
logger.info(f"Memory store initialized: {MEMORY_DB}")


# ---------------------------------------------------------------------------
# Memory Request Models
# ---------------------------------------------------------------------------
class SaveObservationRequest(BaseModel):
    user_id: str
    content: str
    type: str = "general"
    title: Optional[str] = None
    session_id: Optional[str] = None
    tool_name: Optional[str] = None
    importance: int = 1
    tags: Optional[list] = None
    metadata: Optional[dict] = None


class QueryMemoryRequest(BaseModel):
    user_id: str
    query: str
    top_k: int = 10
    type: Optional[str] = None
    since: Optional[str] = None
    include_content: bool = False


class GetObservationsRequest(BaseModel):
    user_id: str
    ids: list


class TimelineRequest(BaseModel):
    user_id: str
    around_id: Optional[int] = None
    around_time: Optional[str] = None
    window_minutes: int = 30
    limit: int = 20


class PreferenceRequest(BaseModel):
    user_id: str
    key: str
    value: str


# ---------------------------------------------------------------------------
# Memory Endpoints
# ---------------------------------------------------------------------------
@app.post("/memory/save")
async def save_observation(req: SaveObservationRequest):
    """Save an observation to memory (SQLite + vector embedding)."""
    if not req.content.strip():
        raise HTTPException(400, "Content cannot be empty")
    if not req.user_id.strip():
        raise HTTPException(400, "user_id is required")

    return memory.save_observation(
        user_id=req.user_id,
        content=req.content,
        obs_type=req.type,
        title=req.title,
        session_id=req.session_id,
        tool_name=req.tool_name,
        importance=req.importance,
        tags=req.tags,
        metadata=req.metadata,
    )


@app.post("/memory/query")
async def query_memory(req: QueryMemoryRequest):
    """
    Search memory using hybrid vector + structured search.
    Returns index by default (progressive disclosure).
    Set include_content=true for full observation content.
    """
    if not req.query.strip():
        raise HTTPException(400, "Query cannot be empty")
    if not req.user_id.strip():
        raise HTTPException(400, "user_id is required")

    return memory.query_memory(
        user_id=req.user_id,
        query=req.query,
        top_k=req.top_k,
        obs_type=req.type,
        since=req.since,
        include_content=req.include_content,
    )


@app.post("/memory/get")
async def get_observations(req: GetObservationsRequest):
    """Fetch full observation details by IDs."""
    if not req.user_id.strip():
        raise HTTPException(400, "user_id is required")
    if not req.ids:
        raise HTTPException(400, "ids list cannot be empty")

    return memory.get_observations(
        user_id=req.user_id,
        ids=req.ids,
    )


@app.post("/memory/timeline")
async def get_timeline(req: TimelineRequest):
    """Get chronological context around a specific observation or time."""
    if not req.user_id.strip():
        raise HTTPException(400, "user_id is required")

    return memory.get_timeline(
        user_id=req.user_id,
        around_id=req.around_id,
        around_time=req.around_time,
        window_minutes=req.window_minutes,
        limit=req.limit,
    )


@app.post("/memory/preference")
async def save_preference(req: PreferenceRequest):
    """Save or update a user preference."""
    if not req.user_id.strip():
        raise HTTPException(400, "user_id is required")

    return memory.save_preference(
        user_id=req.user_id,
        key=req.key,
        value=req.value,
    )


@app.get("/memory/preferences/{user_id}")
async def get_preferences(user_id: str):
    """Get all preferences for a user."""
    return memory.get_preferences(user_id)


@app.get("/memory/stats/{user_id}")
async def get_memory_stats(user_id: str):
    """Get memory statistics for a user."""
    return memory.get_stats(user_id)