FROM python:3.12-slim # System deps: pdfplumber, ffmpeg for video audio extraction, build tools RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ curl \ ffmpeg \ && rm -rf /var/lib/apt/lists/* WORKDIR /app # Install PyTorch with ROCm support first (big layer, cache it) RUN pip install --no-cache-dir \ torch torchvision torchaudio \ --index-url https://download.pytorch.org/whl/nightly/rocm7.2/ # Install remaining Python dependencies COPY app/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY app/ . # Pre-download the embedding model at build time so startup is fast RUN python -c "\ from sentence_transformers import SentenceTransformer; \ m = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'); \ print('Model cached:', m.encode(['test']).shape)" EXPOSE 8899 VOLUME ["/app/data", "/app/logs"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8899", "--log-level", "info"]