feat: Add memory system with SQLite + ChromaDB hybrid storage

- memory_store.py: User-isolated observation storage with vector embeddings - New endpoints: /memory/save, /memory/query, /memory/get, /memory/timeline - Progressive disclosure pattern for token-efficient retrieval - Updated Dockerfile to ROCm 7.2 nightly
2026-02-09 15:42:43 -06:00
commit 76d8f9349e
9 changed files with 1751 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
+data/
+logs/
+__pycache__/
+*.pyc
+.env
--- a/35
+++ b/35
@@ -0,0 +1,35 @@
+FROM python:3.12-slim
+
+# System deps: pdfplumber, ffmpeg for video audio extraction, build tools
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Install PyTorch with ROCm support first (big layer, cache it)
+RUN pip install --no-cache-dir \
+    torch torchvision torchaudio \
+    --index-url https://download.pytorch.org/whl/nightly/rocm7.2/
+
+# Install remaining Python dependencies
+COPY app/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY app/ .
+
+# Pre-download the embedding model at build time so startup is fast
+RUN python -c "\
+from sentence_transformers import SentenceTransformer; \
+m = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'); \
+print('Model cached:', m.encode(['test']).shape)"
+
+EXPOSE 8899
+
+VOLUME ["/app/data", "/app/logs"]
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8899", "--log-level", "info"]
--- a/app/document_processor.py
+++ b/app/document_processor.py
@@ -0,0 +1,168 @@
+"""
+Document processing utilities for the RAG service.
+Handles text chunking and extraction from various file formats.
+"""
+
+import logging
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import List
+
+logger = logging.getLogger("moxie-rag.processor")
+
+# Approximate chars per token for multilingual text
+CHARS_PER_TOKEN = 4
+
+
+def chunk_text(
+    text: str,
+    chunk_size: int = 500,
+    overlap: int = 50,
+) -> List[str]:
+    """
+    Split text into chunks of approximately chunk_size tokens with overlap.
+    """
+    char_size = chunk_size * CHARS_PER_TOKEN
+    char_overlap = overlap * CHARS_PER_TOKEN
+
+    text = text.strip()
+    if not text:
+        return []
+
+    if len(text) <= char_size:
+        return [text]
+
+    chunks = []
+    start = 0
+
+    while start < len(text):
+        end = start + char_size
+
+        if end < len(text):
+            window = text[start:end]
+            best_break = -1
+            for separator in ["\n\n", ".\n", ". ", "?\n", "? ", "!\n", "! ", "\n", ", ", " "]:
+                pos = window.rfind(separator)
+                if pos > char_size // 2:
+                    best_break = pos + len(separator)
+                    break
+            if best_break > 0:
+                end = start + best_break
+
+        chunk = text[start:end].strip()
+        if chunk:
+            chunks.append(chunk)
+
+        next_start = end - char_overlap
+        if next_start <= start:
+            next_start = end
+        start = next_start
+
+    return chunks
+
+
+def extract_text_from_pdf(file_path: str) -> str:
+    """Extract text from a PDF file using pdfplumber."""
+    import pdfplumber
+
+    text_parts = []
+    with pdfplumber.open(file_path) as pdf:
+        for i, page in enumerate(pdf.pages):
+            page_text = page.extract_text()
+            if page_text:
+                text_parts.append(page_text)
+            else:
+                logger.debug(f"Page {i + 1}: no text extracted")
+
+    result = "\n\n".join(text_parts)
+    logger.info(f"Extracted {len(result)} chars from PDF ({len(text_parts)} pages)")
+    return result
+
+
+def extract_text_from_docx(file_path: str) -> str:
+    """Extract text from a DOCX file using python-docx."""
+    from docx import Document
+
+    doc = Document(file_path)
+    paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+    result = "\n\n".join(paragraphs)
+    logger.info(f"Extracted {len(result)} chars from DOCX ({len(paragraphs)} paragraphs)")
+    return result
+
+
+def extract_text_from_excel(file_path: str) -> str:
+    """Extract text from Excel files (.xlsx, .xls) using openpyxl/pandas."""
+    import pandas as pd
+
+    text_parts = []
+    xls = pd.ExcelFile(file_path)
+
+    for sheet_name in xls.sheet_names:
+        df = pd.read_excel(file_path, sheet_name=sheet_name)
+        if df.empty:
+            continue
+
+        text_parts.append(f"--- Sheet: {sheet_name} ---")
+
+        # Include column headers
+        headers = " | ".join(str(c) for c in df.columns)
+        text_parts.append(f"Columns: {headers}")
+
+        # Convert rows to readable text
+        for idx, row in df.iterrows():
+            row_text = " | ".join(
+                f"{col}: {val}" for col, val in row.items()
+                if pd.notna(val) and str(val).strip()
+            )
+            if row_text:
+                text_parts.append(row_text)
+
+    result = "\n".join(text_parts)
+    logger.info(f"Extracted {len(result)} chars from Excel ({len(xls.sheet_names)} sheets)")
+    return result
+
+
+def extract_audio_from_video(video_path: str) -> str:
+    """Extract audio track from video file using ffmpeg. Returns path to wav file."""
+    audio_path = tempfile.mktemp(suffix=".wav")
+    try:
+        subprocess.run(
+            [
+                "ffmpeg", "-i", video_path,
+                "-vn", "-acodec", "pcm_s16le",
+                "-ar", "16000", "-ac", "1",
+                "-y", audio_path,
+            ],
+            capture_output=True,
+            check=True,
+            timeout=600,
+        )
+        logger.info(f"Extracted audio from video to {audio_path}")
+        return audio_path
+    except subprocess.CalledProcessError as e:
+        logger.error(f"ffmpeg failed: {e.stderr.decode()}")
+        raise ValueError(f"Could not extract audio from video: {e.stderr.decode()[:200]}")
+
+
+def extract_text_from_file(file_path: str, filename: str) -> str:
+    """
+    Extract text from a file based on its extension.
+
+    Supported: .pdf, .docx, .doc, .txt, .md, .csv, .json, .html, .xlsx, .xls
+    """
+    ext = Path(filename).suffix.lower()
+
+    if ext == ".pdf":
+        return extract_text_from_pdf(file_path)
+    elif ext in (".docx", ".doc"):
+        return extract_text_from_docx(file_path)
+    elif ext in (".xlsx", ".xls"):
+        return extract_text_from_excel(file_path)
+    elif ext in (".txt", ".md", ".csv", ".json", ".html", ".xml", ".rst"):
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+            content = f.read()
+        logger.info(f"Read {len(content)} chars from {ext} file")
+        return content
+    else:
+        raise ValueError(f"Unsupported file type: {ext}")
--- a/app/email_poller.py
+++ b/app/email_poller.py
@@ -0,0 +1,395 @@
+import re
+#!/usr/bin/env python3
+"""
+Email poller for Zeus RAG — checks zeus@zz11.net via IMAP,
+downloads attachments, and ingests them into the RAG service.
+Also ingests email body text.
+"""
+
+import email
+import email.header
+import imaplib
+import json
+import logging
+import os
+import sys
+import tempfile
+import time
+from datetime import datetime
+from email.message import Message
+from pathlib import Path
+
+import httpx
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+IMAP_HOST = os.environ.get("IMAP_HOST", "mail.oe74.net")
+IMAP_PORT = int(os.environ.get("IMAP_PORT", "993"))
+IMAP_USER = os.environ.get("IMAP_USER", "zeus@zz11.net")
+IMAP_PASS = os.environ.get("IMAP_PASS", "")
+RAG_URL = os.environ.get("RAG_URL", "http://moxie-rag:8899")
+RAG_COLLECTION = os.environ.get("RAG_COLLECTION", "")  # empty = default collection
+POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "60"))  # seconds
+STATE_FILE = os.environ.get("STATE_FILE", "/app/data/email_state.json")
+
+# Whitelist of allowed senders (comma-separated email addresses)
+ALLOWED_SENDERS = os.environ.get("ALLOWED_SENDERS", "")
+ALLOWED_SENDERS_LIST = [s.strip().lower() for s in ALLOWED_SENDERS.split(",") if s.strip()]
+
+SUPPORTED_EXTENSIONS = {
+    ".pdf", ".docx", ".doc", ".txt", ".md", ".csv", ".json",
+    ".xlsx", ".xls", ".html", ".xml",
+}
+MEDIA_EXTENSIONS = {
+    ".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv",
+    ".mp3", ".wav", ".ogg", ".m4a", ".flac", ".aac",
+}
+
+LOG_DIR = Path(os.environ.get("LOG_DIR", "/app/logs"))
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    handlers=[
+        logging.FileHandler(LOG_DIR / "email_poller.log"),
+        logging.StreamHandler(sys.stdout),
+    ],
+)
+logger = logging.getLogger("zeus-email-poller")
+
+
+# ---------------------------------------------------------------------------
+# State management (track processed emails)
+# ---------------------------------------------------------------------------
+def load_state() -> dict:
+    if os.path.exists(STATE_FILE):
+        with open(STATE_FILE) as f:
+            return json.load(f)
+    return {"processed_uids": [], "last_check": None}
+
+
+def save_state(state: dict):
+    Path(STATE_FILE).parent.mkdir(parents=True, exist_ok=True)
+    with open(STATE_FILE, "w") as f:
+        json.dump(state, f, indent=2)
+
+
+# ---------------------------------------------------------------------------
+# Email processing
+# ---------------------------------------------------------------------------
+def decode_header_value(value: str) -> str:
+    """Decode MIME encoded header value."""
+    if not value:
+        return ""
+    parts = email.header.decode_header(value)
+    decoded = []
+    for part, charset in parts:
+        if isinstance(part, bytes):
+            decoded.append(part.decode(charset or "utf-8", errors="replace"))
+        else:
+            decoded.append(part)
+    return " ".join(decoded)
+
+
+def get_email_body(msg: Message) -> str:
+    """Extract plain text body from email message."""
+    body_parts = []
+    if msg.is_multipart():
+        for part in msg.walk():
+            ctype = part.get_content_type()
+            if ctype == "text/plain":
+                payload = part.get_payload(decode=True)
+                if payload:
+                    charset = part.get_content_charset() or "utf-8"
+                    body_parts.append(payload.decode(charset, errors="replace"))
+            elif ctype == "text/html" and not body_parts:
+                # Fallback to HTML if no plain text
+                payload = part.get_payload(decode=True)
+                if payload:
+                    charset = part.get_content_charset() or "utf-8"
+                    body_parts.append(payload.decode(charset, errors="replace"))
+    else:
+        payload = msg.get_payload(decode=True)
+        if payload:
+            charset = msg.get_content_charset() or "utf-8"
+            body_parts.append(payload.decode(charset, errors="replace"))
+    return "\n".join(body_parts).strip()
+
+
+def get_attachments(msg: Message) -> list:
+    """Extract attachments from email message."""
+    attachments = []
+    for part in msg.walk():
+        if part.get_content_maintype() == "multipart":
+            continue
+        filename = part.get_filename()
+        if filename:
+            filename = decode_header_value(filename)
+            payload = part.get_payload(decode=True)
+            if payload:
+                attachments.append({"filename": filename, "data": payload})
+    return attachments
+
+
+def ingest_text(content: str, title: str, source: str, doc_type: str = "email"):
+    """Send text to the RAG ingest endpoint."""
+    try:
+        payload = {
+            "content": content,
+            "title": title,
+            "source": source,
+            "doc_type": doc_type,
+            "date": datetime.now().isoformat(),
+        }
+        if RAG_COLLECTION:
+            payload["collection"] = RAG_COLLECTION
+        resp = httpx.post(
+            f"{RAG_URL}/ingest",
+            json=payload,
+            timeout=120.0,
+        )
+        if resp.status_code == 200:
+            result = resp.json()
+            logger.info(f"Ingested text '{title}': {result.get('chunks_created', 0)} chunks")
+            return result
+        else:
+            logger.error(f"Ingest failed ({resp.status_code}): {resp.text}")
+    except Exception as e:
+        logger.error(f"Error ingesting text: {e}")
+    return None
+
+
+def ingest_file(filepath: str, filename: str, source: str, doc_type: str = None):
+    """Send a file to the RAG ingest-file endpoint."""
+    ext = Path(filename).suffix.lower()
+    try:
+        form_data = {
+            "title": filename,
+            "source": source,
+            "doc_type": doc_type or ext.lstrip("."),
+        }
+        if RAG_COLLECTION:
+            form_data["collection"] = RAG_COLLECTION
+        with open(filepath, "rb") as f:
+            resp = httpx.post(
+                f"{RAG_URL}/ingest-file",
+                files={"file": (filename, f)},
+                data=form_data,
+                timeout=300.0,
+            )
+        if resp.status_code == 200:
+            result = resp.json()
+            logger.info(f"Ingested file '{filename}': {result.get('chunks_created', 0)} chunks")
+            return result
+        else:
+            logger.error(f"File ingest failed ({resp.status_code}): {resp.text}")
+    except Exception as e:
+        logger.error(f"Error ingesting file '{filename}': {e}")
+    return None
+
+
+def transcribe_and_ingest(filepath: str, filename: str, source: str):
+    """Send audio/video to transcribe endpoint with auto_ingest=true."""
+    try:
+        form_data = {
+            "auto_ingest": "true",
+            "title": f"Transcription: {filename}",
+            "source": source,
+        }
+        if RAG_COLLECTION:
+            form_data["collection"] = RAG_COLLECTION
+        with open(filepath, "rb") as f:
+            resp = httpx.post(
+                f"{RAG_URL}/transcribe",
+                files={"file": (filename, f)},
+                data=form_data,
+                timeout=600.0,
+            )
+        if resp.status_code == 200:
+            result = resp.json()
+            logger.info(
+                f"Transcribed+ingested '{filename}': "
+                f"{result.get('word_count', 0)} words, "
+                f"{result.get('chunks_created', 0)} chunks"
+            )
+            return result
+        else:
+            logger.error(f"Transcribe failed ({resp.status_code}): {resp.text}")
+    except Exception as e:
+        logger.error(f"Error transcribing '{filename}': {e}")
+    return None
+
+
+def process_email(uid: str, msg: Message) -> dict:
+    """Process a single email: extract body and attachments, ingest everything."""
+    subject = decode_header_value(msg.get("Subject", "No Subject"))
+    sender = decode_header_value(msg.get("From", "Unknown"))
+    date_str = msg.get("Date", datetime.now().isoformat())
+    source = f"email:{sender}"
+
+    logger.info(f"Processing email UID={uid}: '{subject}' from {sender}")
+
+    # Check sender whitelist
+    if ALLOWED_SENDERS_LIST:
+        sender_email = sender.lower()
+        # Extract email from "Name <email@domain.com>" format
+        email_match = re.search(r'<([^>]+)>', sender_email)
+        if email_match:
+            sender_email = email_match.group(1)
+        
+        if sender_email not in ALLOWED_SENDERS_LIST:
+            logger.warning(f"Rejecting email from {sender}: not in whitelist")
+            return {"uid": uid, "subject": subject, "sender": sender, "rejected": True, "reason": "sender_not_allowed"}
+
+    results = {"uid": uid, "subject": subject, "sender": sender, "ingested": []}
+
+    # 1. Ingest email body
+    body = get_email_body(msg)
+    if body and len(body.strip()) > 20:
+        title = f"Email: {subject}"
+        content = f"From: {sender}\nDate: {date_str}\nSubject: {subject}\n\n{body}"
+        r = ingest_text(content, title, source, doc_type="email")
+        if r:
+            results["ingested"].append({"type": "body", "title": title, **r})
+
+    # 2. Process attachments
+    attachments = get_attachments(msg)
+    for att in attachments:
+        filename = att["filename"]
+        ext = Path(filename).suffix.lower()
+
+        with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
+            tmp.write(att["data"])
+            tmp_path = tmp.name
+
+        try:
+            att_source = f"email-attachment:{sender}:{filename}"
+
+            if ext in SUPPORTED_EXTENSIONS:
+                r = ingest_file(tmp_path, filename, att_source)
+                if r:
+                    results["ingested"].append({"type": "file", "filename": filename, **r})
+
+            elif ext in MEDIA_EXTENSIONS:
+                r = transcribe_and_ingest(tmp_path, filename, att_source)
+                if r:
+                    results["ingested"].append({"type": "media", "filename": filename, **r})
+
+            else:
+                logger.warning(f"Skipping unsupported attachment: {filename} ({ext})")
+        finally:
+            os.unlink(tmp_path)
+
+    return results
+
+
+def check_emails():
+    """Connect to IMAP, fetch unread emails, process them."""
+    state = load_state()
+    processed = set(state.get("processed_uids", []))
+
+    logger.info(f"Connecting to {IMAP_HOST}:{IMAP_PORT} as {IMAP_USER}...")
+
+    try:
+        imap = imaplib.IMAP4_SSL(IMAP_HOST, IMAP_PORT)
+        imap.login(IMAP_USER, IMAP_PASS)
+        imap.select("INBOX")
+
+        # Search for UNSEEN messages
+        status, data = imap.search(None, "UNSEEN")
+        if status != "OK":
+            logger.error(f"IMAP search failed: {status}")
+            return
+
+        message_nums = data[0].split()
+        if not message_nums:
+            logger.info("No new emails.")
+            imap.logout()
+            return
+
+        logger.info(f"Found {len(message_nums)} unread email(s)")
+
+        for num in message_nums:
+            # Get UID
+            status, uid_data = imap.fetch(num, "(UID)")
+            if status != "OK":
+                continue
+            uid = uid_data[0].decode().split("UID ")[1].split(")")[0].strip()
+
+            if uid in processed:
+                logger.info(f"Skipping already-processed UID={uid}")
+                continue
+
+            # Fetch full message
+            status, msg_data = imap.fetch(num, "(RFC822)")
+            if status != "OK":
+                continue
+
+            raw_email = msg_data[0][1]
+            msg = email.message_from_bytes(raw_email)
+
+            try:
+                result = process_email(uid, msg)
+                processed.add(uid)
+                total_ingested = len(result.get("ingested", []))
+                logger.info(
+                    f"Email UID={uid} processed: "
+                    f"{total_ingested} item(s) ingested"
+                )
+            except Exception as e:
+                logger.error(f"Error processing UID={uid}: {e}", exc_info=True)
+
+        imap.logout()
+
+    except imaplib.IMAP4.error as e:
+        logger.error(f"IMAP error: {e}")
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}", exc_info=True)
+
+    # Save state
+    state["processed_uids"] = list(processed)[-500:]  # Keep last 500
+    state["last_check"] = datetime.now().isoformat()
+    save_state(state)
+
+
+# ---------------------------------------------------------------------------
+# Main loop
+# ---------------------------------------------------------------------------
+def main():
+    if not IMAP_PASS:
+        logger.error("IMAP_PASS not set! Cannot connect to email.")
+        sys.exit(1)
+
+    logger.info(f"Email Poller starting — checking {IMAP_USER} every {POLL_INTERVAL}s")
+    logger.info(f"RAG endpoint: {RAG_URL}")
+    if RAG_COLLECTION:
+        logger.info(f"Target collection: {RAG_COLLECTION}")
+    else:
+        logger.info("Target collection: default")
+
+    # Wait for RAG service to be ready
+    for attempt in range(30):
+        try:
+            resp = httpx.get(f"{RAG_URL}/health", timeout=5.0)
+            if resp.status_code == 200:
+                logger.info("RAG service is ready!")
+                break
+        except Exception:
+            pass
+        logger.info(f"Waiting for RAG service... (attempt {attempt + 1}/30)")
+        time.sleep(5)
+    else:
+        logger.error("RAG service not available after 150s, starting anyway")
+
+    while True:
+        try:
+            check_emails()
+        except Exception as e:
+            logger.error(f"Poll cycle error: {e}", exc_info=True)
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+"""
+Moxie RAG Service — FastAPI application.
+Multi-collection support for tenant isolation.
+"""
+
+import os
+import sys
+import shutil
+import logging
+import tempfile
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+import httpx
+import uvicorn
+from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+
+from rag_engine import RAGEngine
+from document_processor import extract_text_from_file, extract_audio_from_video
+
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+LOG_DIR = Path(os.environ.get("LOG_DIR", "/app/logs"))
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    handlers=[
+        logging.FileHandler(LOG_DIR / "rag_service.log"),
+        logging.StreamHandler(sys.stdout),
+    ],
+)
+logger = logging.getLogger("moxie-rag")
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+WHISPER_URL = os.environ.get("WHISPER_URL", "http://host.docker.internal:8081/transcribe")
+UPLOAD_DIR = Path(os.environ.get("UPLOAD_DIR", "/app/data/uploads"))
+UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+
+# ---------------------------------------------------------------------------
+# Engine
+# ---------------------------------------------------------------------------
+engine = RAGEngine(data_dir=os.environ.get("CHROMA_DIR", "/app/data/chromadb"))
+
+# ---------------------------------------------------------------------------
+# FastAPI
+# ---------------------------------------------------------------------------
+app = FastAPI(
+    title="Moxie RAG Service",
+    description="Multi-tenant RAG system for document storage and retrieval",
+    version="2.0.0",
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# ---------------------------------------------------------------------------
+# Request / response models
+# ---------------------------------------------------------------------------
+class IngestRequest(BaseModel):
+    content: str
+    title: Optional[str] = None
+    source: Optional[str] = None
+    date: Optional[str] = None
+    doc_type: Optional[str] = "text"
+    auto_chunk: bool = True
+    collection: Optional[str] = None
+
+
+class QueryRequest(BaseModel):
+    question: str
+    top_k: int = 5
+    filter_type: Optional[str] = None
+    collection: Optional[str] = None
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+@app.get("/")
+async def root():
+    return {
+        "service": "Moxie RAG",
+        "version": "2.0.0",
+        "device": engine.device,
+        "model": engine.model_name,
+        "collections": engine.list_collections(),
+    }
+
+
+@app.get("/health")
+async def health():
+    return {
+        "status": "ok",
+        "device": engine.device,
+        "collections": engine.list_collections(),
+    }
+
+
+@app.get("/collections")
+async def list_collections():
+    """List all collections and their chunk counts."""
+    return {"collections": engine.list_collections()}
+
+
+@app.post("/ingest")
+async def ingest_text(req: IngestRequest):
+    """Ingest text content into the vector store."""
+    if not req.content.strip():
+        raise HTTPException(400, "Content cannot be empty")
+    try:
+        return engine.ingest(
+            content=req.content,
+            title=req.title or "Untitled",
+            source=req.source or "unknown",
+            date=req.date,
+            doc_type=req.doc_type or "text",
+            auto_chunk=req.auto_chunk,
+            collection=req.collection,
+        )
+    except ValueError as exc:
+        raise HTTPException(400, str(exc))
+
+
+@app.post("/ingest-file")
+async def ingest_file(
+    file: UploadFile = File(...),
+    title: Optional[str] = Form(None),
+    source: Optional[str] = Form(None),
+    date: Optional[str] = Form(None),
+    doc_type: Optional[str] = Form(None),
+    collection: Optional[str] = Form(None),
+):
+    """Upload and ingest a document (PDF, DOCX, TXT, MD, XLSX, XLS, CSV)."""
+    suffix = Path(file.filename).suffix.lower()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        content = await file.read()
+        tmp.write(content)
+        tmp_path = tmp.name
+
+    try:
+        text = extract_text_from_file(tmp_path, file.filename)
+        if not text.strip():
+            raise HTTPException(400, "Could not extract text from file")
+
+        # Keep a copy
+        dest = UPLOAD_DIR / f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{file.filename}"
+        shutil.copy2(tmp_path, dest)
+
+        return engine.ingest(
+            content=text,
+            title=title or file.filename,
+            source=source or f"file:{file.filename}",
+            date=date,
+            doc_type=doc_type or suffix.lstrip("."),
+            collection=collection,
+        )
+    except ValueError as exc:
+        raise HTTPException(400, str(exc))
+    finally:
+        os.unlink(tmp_path)
+
+
+@app.post("/query")
+async def query(req: QueryRequest):
+    """Semantic search over indexed documents."""
+    if not req.question.strip():
+        raise HTTPException(400, "Question cannot be empty")
+    return engine.query(
+        question=req.question,
+        top_k=req.top_k,
+        filter_type=req.filter_type,
+        collection=req.collection,
+    )
+
+
+@app.get("/documents")
+async def list_documents(collection: Optional[str] = Query(None)):
+    """List all indexed documents."""
+    return engine.list_documents(collection=collection)
+
+
+@app.delete("/documents/{doc_id}")
+async def delete_document(doc_id: str, collection: Optional[str] = Query(None)):
+    """Delete a document and all its chunks."""
+    try:
+        return engine.delete_document(doc_id, collection=collection)
+    except KeyError as exc:
+        raise HTTPException(404, str(exc))
+
+
+@app.post("/transcribe")
+async def transcribe(
+    file: UploadFile = File(...),
+    auto_ingest: bool = Form(False),
+    title: Optional[str] = Form(None),
+    source: Optional[str] = Form(None),
+    language: Optional[str] = Form(None),
+    collection: Optional[str] = Form(None),
+):
+    """Transcribe audio/video via Whisper, optionally auto-ingest the result."""
+    suffix = Path(file.filename).suffix.lower()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        content = await file.read()
+        tmp.write(content)
+        tmp_path = tmp.name
+
+    audio_path = None
+    try:
+        # If video, extract audio first
+        video_exts = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv"}
+        send_path = tmp_path
+        if suffix in video_exts:
+            logger.info(f"Extracting audio from video: {file.filename}")
+            audio_path = extract_audio_from_video(tmp_path)
+            send_path = audio_path
+
+        async with httpx.AsyncClient(timeout=600.0) as client:
+            with open(send_path, "rb") as audio_file:
+                send_name = file.filename if suffix not in video_exts else Path(file.filename).stem + ".wav"
+                files = {"file": (send_name, audio_file)}
+                resp = await client.post(WHISPER_URL, files=files)
+
+            if resp.status_code != 200:
+                raise HTTPException(502, f"Whisper error: {resp.status_code} — {resp.text}")
+
+            result = resp.json()
+            transcription = result.get("text", result.get("transcription", ""))
+
+        if not transcription.strip():
+            raise HTTPException(400, "Transcription returned empty text")
+
+        response = {
+            "filename": file.filename,
+            "transcription": transcription,
+            "word_count": len(transcription.split()),
+        }
+
+        if auto_ingest:
+            ingest_result = engine.ingest(
+                content=transcription,
+                title=title or f"Transcription: {file.filename}",
+                source=source or f"audio:{file.filename}",
+                doc_type="transcription",
+                collection=collection,
+            )
+            response["ingested"] = True
+            response["doc_id"] = ingest_result["doc_id"]
+            response["chunks_created"] = ingest_result["chunks_created"]
+            response["collection"] = ingest_result["collection"]
+        else:
+            response["ingested"] = False
+
+        logger.info(f"Transcribed '{file.filename}' ({response['word_count']} words)")
+        return response
+
+    finally:
+        os.unlink(tmp_path)
+        if audio_path and os.path.exists(audio_path):
+            os.unlink(audio_path)
+
+
+# ---------------------------------------------------------------------------
+# Entrypoint
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    uvicorn.run("main:app", host="0.0.0.0", port=8899, log_level="info")
+
+
+# ---------------------------------------------------------------------------
+# Memory Store Integration
+# ---------------------------------------------------------------------------
+from memory_store import MemoryStore
+
+MEMORY_DB = os.environ.get("MEMORY_DB", "/app/data/memory.db")
+memory = MemoryStore(db_path=MEMORY_DB, rag_engine=engine)
+logger.info(f"Memory store initialized: {MEMORY_DB}")
+
+
+# ---------------------------------------------------------------------------
+# Memory Request Models
+# ---------------------------------------------------------------------------
+class SaveObservationRequest(BaseModel):
+    user_id: str
+    content: str
+    type: str = "general"
+    title: Optional[str] = None
+    session_id: Optional[str] = None
+    tool_name: Optional[str] = None
+    importance: int = 1
+    tags: Optional[list] = None
+    metadata: Optional[dict] = None
+
+
+class QueryMemoryRequest(BaseModel):
+    user_id: str
+    query: str
+    top_k: int = 10
+    type: Optional[str] = None
+    since: Optional[str] = None
+    include_content: bool = False
+
+
+class GetObservationsRequest(BaseModel):
+    user_id: str
+    ids: list
+
+
+class TimelineRequest(BaseModel):
+    user_id: str
+    around_id: Optional[int] = None
+    around_time: Optional[str] = None
+    window_minutes: int = 30
+    limit: int = 20
+
+
+class PreferenceRequest(BaseModel):
+    user_id: str
+    key: str
+    value: str
+
+
+# ---------------------------------------------------------------------------
+# Memory Endpoints
+# ---------------------------------------------------------------------------
+@app.post("/memory/save")
+async def save_observation(req: SaveObservationRequest):
+    """Save an observation to memory (SQLite + vector embedding)."""
+    if not req.content.strip():
+        raise HTTPException(400, "Content cannot be empty")
+    if not req.user_id.strip():
+        raise HTTPException(400, "user_id is required")
+    
+    return memory.save_observation(
+        user_id=req.user_id,
+        content=req.content,
+        obs_type=req.type,
+        title=req.title,
+        session_id=req.session_id,
+        tool_name=req.tool_name,
+        importance=req.importance,
+        tags=req.tags,
+        metadata=req.metadata,
+    )
+
+
+@app.post("/memory/query")
+async def query_memory(req: QueryMemoryRequest):
+    """
+    Search memory using hybrid vector + structured search.
+    Returns index by default (progressive disclosure).
+    Set include_content=true for full observation content.
+    """
+    if not req.query.strip():
+        raise HTTPException(400, "Query cannot be empty")
+    if not req.user_id.strip():
+        raise HTTPException(400, "user_id is required")
+    
+    return memory.query_memory(
+        user_id=req.user_id,
+        query=req.query,
+        top_k=req.top_k,
+        obs_type=req.type,
+        since=req.since,
+        include_content=req.include_content,
+    )
+
+
+@app.post("/memory/get")
+async def get_observations(req: GetObservationsRequest):
+    """Fetch full observation details by IDs."""
+    if not req.user_id.strip():
+        raise HTTPException(400, "user_id is required")
+    if not req.ids:
+        raise HTTPException(400, "ids list cannot be empty")
+    
+    return memory.get_observations(
+        user_id=req.user_id,
+        ids=req.ids,
+    )
+
+
+@app.post("/memory/timeline")
+async def get_timeline(req: TimelineRequest):
+    """Get chronological context around a specific observation or time."""
+    if not req.user_id.strip():
+        raise HTTPException(400, "user_id is required")
+    
+    return memory.get_timeline(
+        user_id=req.user_id,
+        around_id=req.around_id,
+        around_time=req.around_time,
+        window_minutes=req.window_minutes,
+        limit=req.limit,
+    )
+
+
+@app.post("/memory/preference")
+async def save_preference(req: PreferenceRequest):
+    """Save or update a user preference."""
+    if not req.user_id.strip():
+        raise HTTPException(400, "user_id is required")
+    
+    return memory.save_preference(
+        user_id=req.user_id,
+        key=req.key,
+        value=req.value,
+    )
+
+
+@app.get("/memory/preferences/{user_id}")
+async def get_preferences(user_id: str):
+    """Get all preferences for a user."""
+    return memory.get_preferences(user_id)
+
+
+@app.get("/memory/stats/{user_id}")
+async def get_memory_stats(user_id: str):
+    """Get memory statistics for a user."""
+    return memory.get_stats(user_id)
--- a/app/memory_store.py
+++ b/app/memory_store.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+"""
+Memory Store — SQLite + ChromaDB hybrid for agent observations.
+Provides structured storage with vector search.
+"""
+
+import sqlite3
+import hashlib
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+from contextlib import contextmanager
+
+logger = logging.getLogger("moxie-rag.memory")
+
+
+class MemoryStore:
+    """SQLite-backed memory store with ChromaDB integration."""
+    
+    def __init__(self, db_path: str, rag_engine):
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self.rag_engine = rag_engine
+        self._init_db()
+    
+    @contextmanager
+    def _get_conn(self):
+        """Thread-safe connection context manager."""
+        conn = sqlite3.connect(str(self.db_path), timeout=30.0)
+        conn.row_factory = sqlite3.Row
+        try:
+            yield conn
+        finally:
+            conn.close()
+    
+    def _init_db(self):
+        """Initialize the SQLite schema."""
+        with self._get_conn() as conn:
+            conn.executescript("""
+                CREATE TABLE IF NOT EXISTS observations (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    user_id TEXT NOT NULL,
+                    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    type TEXT NOT NULL,
+                    title TEXT,
+                    content TEXT NOT NULL,
+                    content_hash TEXT UNIQUE,
+                    embedding_id TEXT,
+                    session_id TEXT,
+                    tool_name TEXT,
+                    importance INTEGER DEFAULT 1,
+                    tags TEXT,
+                    metadata TEXT
+                );
+                
+                CREATE INDEX IF NOT EXISTS idx_obs_user ON observations(user_id);
+                CREATE INDEX IF NOT EXISTS idx_obs_type ON observations(type);
+                CREATE INDEX IF NOT EXISTS idx_obs_timestamp ON observations(timestamp);
+                CREATE INDEX IF NOT EXISTS idx_obs_session ON observations(session_id);
+                
+                CREATE TABLE IF NOT EXISTS preferences (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    user_id TEXT NOT NULL,
+                    key TEXT NOT NULL,
+                    value TEXT NOT NULL,
+                    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    UNIQUE(user_id, key)
+                );
+                
+                CREATE INDEX IF NOT EXISTS idx_pref_user ON preferences(user_id);
+                
+                CREATE TABLE IF NOT EXISTS relationships (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    user_id TEXT NOT NULL,
+                    observation_id INTEGER,
+                    related_id INTEGER,
+                    relation_type TEXT,
+                    FOREIGN KEY (observation_id) REFERENCES observations(id),
+                    FOREIGN KEY (related_id) REFERENCES observations(id)
+                );
+            """)
+            conn.commit()
+        logger.info(f"Memory store initialized at {self.db_path}")
+    
+    def _content_hash(self, content: str) -> str:
+        """Generate hash for deduplication."""
+        return hashlib.sha256(content.encode()).hexdigest()[:16]
+    
+    def _get_collection_name(self, user_id: str) -> str:
+        """Get ChromaDB collection name for user."""
+        return f"moxie_memory_{user_id}"
+    
+    def save_observation(
+        self,
+        user_id: str,
+        content: str,
+        obs_type: str = "general",
+        title: Optional[str] = None,
+        session_id: Optional[str] = None,
+        tool_name: Optional[str] = None,
+        importance: int = 1,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict] = None,
+    ) -> Dict[str, Any]:
+        """
+        Save an observation to SQLite and embed in ChromaDB.
+        Returns the observation ID and embedding status.
+        """
+        content_hash = self._content_hash(content)
+        collection = self._get_collection_name(user_id)
+        
+        # Check for duplicate
+        with self._get_conn() as conn:
+            existing = conn.execute(
+                "SELECT id FROM observations WHERE content_hash = ?",
+                (content_hash,)
+            ).fetchone()
+            
+            if existing:
+                return {
+                    "status": "duplicate",
+                    "observation_id": existing["id"],
+                    "message": "Observation already exists"
+                }
+        
+        # Embed in ChromaDB
+        embed_result = self.rag_engine.ingest(
+            content=content,
+            title=title or f"Observation: {obs_type}",
+            source=f"memory:{user_id}:{obs_type}",
+            doc_type="observation",
+            collection=collection,
+        )
+        embedding_id = embed_result.get("doc_id")
+        
+        # Store in SQLite
+        tags_str = ",".join(tags) if tags else None
+        metadata_str = str(metadata) if metadata else None
+        
+        with self._get_conn() as conn:
+            cursor = conn.execute("""
+                INSERT INTO observations 
+                (user_id, type, title, content, content_hash, embedding_id, 
+                 session_id, tool_name, importance, tags, metadata)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                user_id, obs_type, title, content, content_hash, embedding_id,
+                session_id, tool_name, importance, tags_str, metadata_str
+            ))
+            conn.commit()
+            obs_id = cursor.lastrowid
+        
+        logger.info(f"Saved observation #{obs_id} for user {user_id} (type: {obs_type})")
+        return {
+            "status": "created",
+            "observation_id": obs_id,
+            "embedding_id": embedding_id,
+            "collection": collection,
+        }
+    
+    def query_memory(
+        self,
+        user_id: str,
+        query: str,
+        top_k: int = 10,
+        obs_type: Optional[str] = None,
+        since: Optional[str] = None,
+        include_content: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Search memory using hybrid SQLite + vector search.
+        Progressive disclosure: returns index by default, full content if requested.
+        """
+        collection = self._get_collection_name(user_id)
+        
+        # Vector search in ChromaDB
+        vector_results = self.rag_engine.query(
+            question=query,
+            top_k=top_k * 2,  # Get more for filtering
+            collection=collection,
+        )
+        
+        # Get observation IDs from embedding IDs
+        embedding_ids = [r.get("metadata", {}).get("doc_id") for r in vector_results.get("results", [])]
+        
+        if not embedding_ids:
+            return {"results": [], "total": 0, "query": query}
+        
+        # Fetch from SQLite with filters
+        placeholders = ",".join(["?" for _ in embedding_ids])
+        sql = f"""
+            SELECT id, user_id, timestamp, type, title, importance, tags, tool_name
+            {"" if not include_content else ", content"}
+            FROM observations 
+            WHERE user_id = ? AND embedding_id IN ({placeholders})
+        """
+        params = [user_id] + embedding_ids
+        
+        if obs_type:
+            sql += " AND type = ?"
+            params.append(obs_type)
+        
+        if since:
+            sql += " AND timestamp >= ?"
+            params.append(since)
+        
+        sql += " ORDER BY timestamp DESC LIMIT ?"
+        params.append(top_k)
+        
+        with self._get_conn() as conn:
+            rows = conn.execute(sql, params).fetchall()
+        
+        results = []
+        for row in rows:
+            item = {
+                "id": row["id"],
+                "timestamp": row["timestamp"],
+                "type": row["type"],
+                "title": row["title"],
+                "importance": row["importance"],
+                "tags": row["tags"].split(",") if row["tags"] else [],
+                "tool_name": row["tool_name"],
+            }
+            if include_content:
+                item["content"] = row["content"]
+            results.append(item)
+        
+        return {
+            "results": results,
+            "total": len(results),
+            "query": query,
+            "collection": collection,
+        }
+    
+    def get_observations(
+        self,
+        user_id: str,
+        ids: List[int],
+    ) -> Dict[str, Any]:
+        """Fetch full observation details by IDs."""
+        if not ids:
+            return {"observations": []}
+        
+        placeholders = ",".join(["?" for _ in ids])
+        sql = f"""
+            SELECT * FROM observations 
+            WHERE user_id = ? AND id IN ({placeholders})
+            ORDER BY timestamp DESC
+        """
+        
+        with self._get_conn() as conn:
+            rows = conn.execute(sql, [user_id] + ids).fetchall()
+        
+        observations = []
+        for row in rows:
+            observations.append({
+                "id": row["id"],
+                "timestamp": row["timestamp"],
+                "type": row["type"],
+                "title": row["title"],
+                "content": row["content"],
+                "importance": row["importance"],
+                "tags": row["tags"].split(",") if row["tags"] else [],
+                "tool_name": row["tool_name"],
+                "session_id": row["session_id"],
+                "metadata": row["metadata"],
+            })
+        
+        return {"observations": observations, "count": len(observations)}
+    
+    def get_timeline(
+        self,
+        user_id: str,
+        around_id: Optional[int] = None,
+        around_time: Optional[str] = None,
+        window_minutes: int = 30,
+        limit: int = 20,
+    ) -> Dict[str, Any]:
+        """Get chronological context around a specific observation or time."""
+        with self._get_conn() as conn:
+            if around_id:
+                # Get timestamp of reference observation
+                ref = conn.execute(
+                    "SELECT timestamp FROM observations WHERE id = ? AND user_id = ?",
+                    (around_id, user_id)
+                ).fetchone()
+                if not ref:
+                    return {"error": "Observation not found", "timeline": []}
+                center_time = ref["timestamp"]
+            elif around_time:
+                center_time = around_time
+            else:
+                center_time = datetime.now().isoformat()
+            
+            # Get observations in time window
+            rows = conn.execute("""
+                SELECT id, timestamp, type, title, importance, tool_name
+                FROM observations
+                WHERE user_id = ?
+                  AND datetime(timestamp) BETWEEN 
+                      datetime(?, '-' || ? || ' minutes')
+                      AND datetime(?, '+' || ? || ' minutes')
+                ORDER BY timestamp
+                LIMIT ?
+            """, (user_id, center_time, window_minutes, center_time, window_minutes, limit)).fetchall()
+        
+        timeline = [{
+            "id": row["id"],
+            "timestamp": row["timestamp"],
+            "type": row["type"],
+            "title": row["title"],
+            "importance": row["importance"],
+            "tool_name": row["tool_name"],
+        } for row in rows]
+        
+        return {
+            "timeline": timeline,
+            "center_time": center_time,
+            "window_minutes": window_minutes,
+            "count": len(timeline),
+        }
+    
+    def save_preference(
+        self,
+        user_id: str,
+        key: str,
+        value: str,
+    ) -> Dict[str, Any]:
+        """Save or update a user preference."""
+        with self._get_conn() as conn:
+            conn.execute("""
+                INSERT INTO preferences (user_id, key, value)
+                VALUES (?, ?, ?)
+                ON CONFLICT(user_id, key) DO UPDATE SET 
+                    value = excluded.value,
+                    timestamp = CURRENT_TIMESTAMP
+            """, (user_id, key, value))
+            conn.commit()
+        
+        return {"status": "saved", "user_id": user_id, "key": key}
+    
+    def get_preferences(self, user_id: str) -> Dict[str, str]:
+        """Get all preferences for a user."""
+        with self._get_conn() as conn:
+            rows = conn.execute(
+                "SELECT key, value FROM preferences WHERE user_id = ?",
+                (user_id,)
+            ).fetchall()
+        
+        return {row["key"]: row["value"] for row in rows}
+    
+    def get_stats(self, user_id: str) -> Dict[str, Any]:
+        """Get memory statistics for a user."""
+        with self._get_conn() as conn:
+            total = conn.execute(
+                "SELECT COUNT(*) as c FROM observations WHERE user_id = ?",
+                (user_id,)
+            ).fetchone()["c"]
+            
+            by_type = conn.execute("""
+                SELECT type, COUNT(*) as c 
+                FROM observations WHERE user_id = ?
+                GROUP BY type
+            """, (user_id,)).fetchall()
+            
+            recent = conn.execute("""
+                SELECT COUNT(*) as c FROM observations 
+                WHERE user_id = ? AND timestamp >= datetime('now', '-7 days')
+            """, (user_id,)).fetchone()["c"]
+        
+        return {
+            "user_id": user_id,
+            "total_observations": total,
+            "by_type": {row["type"]: row["c"] for row in by_type},
+            "last_7_days": recent,
+            "collection": self._get_collection_name(user_id),
+        }
--- a/app/rag_engine.py
+++ b/app/rag_engine.py
@@ -0,0 +1,257 @@
+"""
+RAG Engine — ChromaDB + sentence-transformers embedding logic.
+Supports multiple collections for tenant isolation.
+"""
+
+import hashlib
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import chromadb
+from sentence_transformers import SentenceTransformer
+
+from document_processor import chunk_text
+
+logger = logging.getLogger("moxie-rag.engine")
+
+# ---------------------------------------------------------------------------
+# Detect best device
+# ---------------------------------------------------------------------------
+DEVICE = "cpu"
+try:
+    import torch
+
+    if torch.cuda.is_available():
+        DEVICE = "cuda"
+        gpu_name = torch.cuda.get_device_name(0) if torch.cuda.device_count() > 0 else "unknown"
+        logger.info(f"GPU detected: {gpu_name}")
+except ImportError:
+    pass
+
+logger.info(f"Embedding device: {DEVICE}")
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+DEFAULT_COLLECTION = "adolfo_docs"
+
+
+class RAGEngine:
+    """Manages embeddings and vector storage with multi-collection support."""
+
+    def __init__(self, data_dir: str = "/app/data/chromadb"):
+        Path(data_dir).mkdir(parents=True, exist_ok=True)
+
+        logger.info(f"Loading embedding model '{EMBEDDING_MODEL}' on {DEVICE} ...")
+        self.embedder = SentenceTransformer(EMBEDDING_MODEL, device=DEVICE)
+        logger.info("Embedding model loaded.")
+
+        self.chroma = chromadb.PersistentClient(path=data_dir)
+
+        # Pre-load default collection
+        self._collections: Dict[str, Any] = {}
+        self._get_collection(DEFAULT_COLLECTION)
+        logger.info(
+            f"ChromaDB collection '{DEFAULT_COLLECTION}' ready — "
+            f"{self._collections[DEFAULT_COLLECTION].count()} existing chunks."
+        )
+
+    # ------------------------------------------------------------------
+    # Collection management
+    # ------------------------------------------------------------------
+    def _get_collection(self, name: str):
+        """Get or create a ChromaDB collection by name."""
+        if name not in self._collections:
+            self._collections[name] = self.chroma.get_or_create_collection(
+                name=name,
+                metadata={"hnsw:space": "cosine"},
+            )
+            logger.info(f"Collection '{name}' loaded ({self._collections[name].count()} chunks)")
+        return self._collections[name]
+
+    def list_collections(self) -> List[Dict[str, Any]]:
+        """List all collections with their document counts."""
+        collections = self.chroma.list_collections()
+        result = []
+        for coll in collections:
+            # ChromaDB >= 1.x returns Collection objects, older versions return strings
+            name = coll if isinstance(coll, str) else coll.name
+            c = self._get_collection(name)
+            result.append({"name": name, "chunks": c.count()})
+        return result
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    @property
+    def device(self) -> str:
+        return DEVICE
+
+    @property
+    def model_name(self) -> str:
+        return EMBEDDING_MODEL
+
+    @property
+    def doc_count(self) -> int:
+        return self._get_collection(DEFAULT_COLLECTION).count()
+
+    def collection_count(self, collection: str = None) -> int:
+        return self._get_collection(collection or DEFAULT_COLLECTION).count()
+
+    @staticmethod
+    def _make_doc_id(title: str, source: str) -> str:
+        raw = f"{title}:{source}:{datetime.now().isoformat()}"
+        return hashlib.md5(raw.encode()).hexdigest()[:12]
+
+    def _embed(self, texts: List[str]) -> List[List[float]]:
+        return self.embedder.encode(texts, show_progress_bar=False).tolist()
+
+    # ------------------------------------------------------------------
+    # Ingest
+    # ------------------------------------------------------------------
+    def ingest(
+        self,
+        content: str,
+        title: str = "Untitled",
+        source: str = "unknown",
+        date: Optional[str] = None,
+        doc_type: str = "text",
+        auto_chunk: bool = True,
+        collection: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Chunk, embed, and store content in specified collection."""
+        coll = self._get_collection(collection or DEFAULT_COLLECTION)
+        doc_id = self._make_doc_id(title, source)
+        date = date or datetime.now().isoformat()
+
+        chunks = chunk_text(content) if auto_chunk else [content.strip()]
+        chunks = [c for c in chunks if c]
+        if not chunks:
+            raise ValueError("No content to ingest after processing")
+
+        embeddings = self._embed(chunks)
+
+        ids = [f"{doc_id}_chunk_{i}" for i in range(len(chunks))]
+        metadatas = [
+            {
+                "doc_id": doc_id,
+                "title": title,
+                "source": source,
+                "date": date,
+                "doc_type": doc_type,
+                "chunk_index": i,
+                "total_chunks": len(chunks),
+            }
+            for i in range(len(chunks))
+        ]
+
+        coll.add(
+            ids=ids,
+            embeddings=embeddings,
+            documents=chunks,
+            metadatas=metadatas,
+        )
+
+        coll_name = collection or DEFAULT_COLLECTION
+        logger.info(f"Ingested '{title}' ({len(chunks)} chunks) [{doc_id}] → {coll_name}")
+        return {
+            "doc_id": doc_id,
+            "title": title,
+            "chunks_created": len(chunks),
+            "total_documents": coll.count(),
+            "collection": coll_name,
+        }
+
+    # ------------------------------------------------------------------
+    # Query
+    # ------------------------------------------------------------------
+    def query(
+        self,
+        question: str,
+        top_k: int = 5,
+        filter_type: Optional[str] = None,
+        collection: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Semantic search over indexed chunks in specified collection."""
+        coll = self._get_collection(collection or DEFAULT_COLLECTION)
+
+        if coll.count() == 0:
+            return {"question": question, "results": [], "total_results": 0, "collection": collection or DEFAULT_COLLECTION}
+
+        query_emb = self._embed([question])
+        where = {"doc_type": filter_type} if filter_type else None
+
+        results = coll.query(
+            query_embeddings=query_emb,
+            n_results=min(top_k, coll.count()),
+            where=where,
+            include=["documents", "metadatas", "distances"],
+        )
+
+        formatted = []
+        if results and results["ids"] and results["ids"][0]:
+            for i, cid in enumerate(results["ids"][0]):
+                formatted.append(
+                    {
+                        "chunk_id": cid,
+                        "content": results["documents"][0][i],
+                        "metadata": results["metadatas"][0][i],
+                        "distance": results["distances"][0][i],
+                    }
+                )
+
+        coll_name = collection or DEFAULT_COLLECTION
+        logger.info(f"Query [{coll_name}]: '{question}' → {len(formatted)} results")
+        return {
+            "question": question,
+            "results": formatted,
+            "total_results": len(formatted),
+            "collection": coll_name,
+        }
+
+    # ------------------------------------------------------------------
+    # Document management
+    # ------------------------------------------------------------------
+    def list_documents(self, collection: Optional[str] = None) -> Dict[str, Any]:
+        """List all indexed documents grouped by doc_id in specified collection."""
+        coll = self._get_collection(collection or DEFAULT_COLLECTION)
+
+        if coll.count() == 0:
+            return {"documents": [], "total": 0, "collection": collection or DEFAULT_COLLECTION}
+
+        all_data = coll.get(include=["metadatas"])
+        docs: Dict[str, Dict] = {}
+        for meta in all_data["metadatas"]:
+            did = meta.get("doc_id", "unknown")
+            if did not in docs:
+                docs[did] = {
+                    "doc_id": did,
+                    "title": meta.get("title", "Unknown"),
+                    "source": meta.get("source", "unknown"),
+                    "doc_type": meta.get("doc_type", "text"),
+                    "date": meta.get("date", "unknown"),
+                    "chunk_count": 0,
+                }
+            docs[did]["chunk_count"] += 1
+
+        return {"documents": list(docs.values()), "total": len(docs), "collection": collection or DEFAULT_COLLECTION}
+
+    def delete_document(self, doc_id: str, collection: Optional[str] = None) -> Dict[str, Any]:
+        """Delete all chunks belonging to a document in specified collection."""
+        coll = self._get_collection(collection or DEFAULT_COLLECTION)
+        all_data = coll.get(include=["metadatas"])
+        ids_to_delete = [
+            all_data["ids"][i]
+            for i, m in enumerate(all_data["metadatas"])
+            if m.get("doc_id") == doc_id
+        ]
+
+        if not ids_to_delete:
+            raise KeyError(f"Document '{doc_id}' not found")
+
+        coll.delete(ids=ids_to_delete)
+        logger.info(f"Deleted {doc_id} ({len(ids_to_delete)} chunks)")
+        return {"deleted": doc_id, "chunks_removed": len(ids_to_delete)}
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -0,0 +1,11 @@
+# PyTorch ROCm installed separately in Dockerfile
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+chromadb>=0.4.0
+sentence-transformers>=2.2.0
+pdfplumber>=0.10.0
+python-docx>=1.0.0
+python-multipart>=0.0.6
+httpx>=0.25.0
+pandas>=2.1.0
+openpyxl>=3.1.0
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,69 @@
+services:
+  rag:
+    build: .
+    container_name: moxie-rag
+    restart: unless-stopped
+    ports:
+      - "8899:8899"
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+    environment:
+      - WHISPER_URL=http://host.docker.internal:8081/transcribe
+      - CHROMA_DIR=/app/data/chromadb
+      - UPLOAD_DIR=/app/data/uploads
+      - LOG_DIR=/app/logs
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - "44"
+      - "992"
+    security_opt:
+      - seccomp=unconfined
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    ipc: host
+
+  poller-zeus:
+    build: .
+    container_name: zeus-email-poller
+    restart: unless-stopped
+    command: python email_poller.py
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+    environment:
+      - IMAP_HOST=mail.oe74.net
+      - IMAP_PORT=993
+      - IMAP_USER=zeus@zz11.net
+      - IMAP_PASS=#!nvo@uHR6493
+      - RAG_URL=http://moxie-rag:8899
+      - RAG_COLLECTION=zeus_docs
+      - ALLOWED_SENDERS=isabella.isg@gmail.com
+      - POLL_INTERVAL=60
+      - STATE_FILE=/app/data/zeus_email_state.json
+      - LOG_DIR=/app/logs
+    depends_on:
+      - rag
+
+  poller-moxie:
+    build: .
+    container_name: moxie-email-poller
+    restart: unless-stopped
+    command: python email_poller.py
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+    environment:
+      - IMAP_HOST=mail.oe74.net
+      - IMAP_PORT=993
+      - IMAP_USER=moxie@zz11.net
+      - IMAP_PASS=Xn1R#JThrcn0k
+      - RAG_URL=http://moxie-rag:8899
+      - RAG_COLLECTION=adolfo_docs
+      - POLL_INTERVAL=60
+      - STATE_FILE=/app/data/moxie_email_state.json
+      - LOG_DIR=/app/logs
+    depends_on:
+      - rag