This commit is contained in:
2026-03-10 14:40:51 -03:00
parent 290f05be87
commit 92713a4d1c
30 changed files with 2074 additions and 0 deletions

View File

@@ -0,0 +1,36 @@
version: "3.8"
services:
nim:
image: nvcr.io/nim/${NIM_MODEL:-meta/llama-3.1-8b-instruct}:${NIM_VERSION:-latest}
container_name: nvidia-nim
restart: unless-stopped
ports:
- "${NIM_PORT:-8000}:8000"
volumes:
- nim_cache:/opt/nim/.cache
environment:
- NGC_API_KEY=${NGC_API_KEY}
- NIM_MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096}
- NIM_GPU_MEMORY_UTILIZATION=${GPU_MEM_UTIL:-0.9}
- NIM_MAX_BATCH_SIZE=${MAX_BATCH:-256}
- NIM_LOG_LEVEL=${LOG_LEVEL:-INFO}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
shm_size: ${SHM_SIZE:-16g}
ulimits:
memlock: -1
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"]
interval: 30s
timeout: 10s
retries: 10
start_period: 120s
volumes:
nim_cache: