version: "3.8" services: localai: image: localai/localai:latest-gpu-nvidia-cuda-12 container_name: localai restart: unless-stopped ports: - "${LOCALAI_PORT:-8080}:8080" volumes: - localai_models:/build/models environment: - THREADS=${THREADS:-4} - CONTEXT_SIZE=${CONTEXT_SIZE:-4096} - MODELS_PATH=/build/models deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] volumes: localai_models: