Files
portainer_scripts/ai-templates-0/stacks/nvidia-nim/docker-compose.yml
2026-03-10 14:40:51 -03:00

37 lines
930 B
YAML

version: "3.8"
services:
nim:
image: nvcr.io/nim/${NIM_MODEL:-meta/llama-3.1-8b-instruct}:${NIM_VERSION:-latest}
container_name: nvidia-nim
restart: unless-stopped
ports:
- "${NIM_PORT:-8000}:8000"
volumes:
- nim_cache:/opt/nim/.cache
environment:
- NGC_API_KEY=${NGC_API_KEY}
- NIM_MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096}
- NIM_GPU_MEMORY_UTILIZATION=${GPU_MEM_UTIL:-0.9}
- NIM_MAX_BATCH_SIZE=${MAX_BATCH:-256}
- NIM_LOG_LEVEL=${LOG_LEVEL:-INFO}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
shm_size: ${SHM_SIZE:-16g}
ulimits:
memlock: -1
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"]
interval: 30s
timeout: 10s
retries: 10
start_period: 120s
volumes:
nim_cache: