37 lines
930 B
YAML
37 lines
930 B
YAML
version: "3.8"
|
|
|
|
services:
|
|
nim:
|
|
image: nvcr.io/nim/${NIM_MODEL:-meta/llama-3.1-8b-instruct}:${NIM_VERSION:-latest}
|
|
container_name: nvidia-nim
|
|
restart: unless-stopped
|
|
ports:
|
|
- "${NIM_PORT:-8000}:8000"
|
|
volumes:
|
|
- nim_cache:/opt/nim/.cache
|
|
environment:
|
|
- NGC_API_KEY=${NGC_API_KEY}
|
|
- NIM_MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096}
|
|
- NIM_GPU_MEMORY_UTILIZATION=${GPU_MEM_UTIL:-0.9}
|
|
- NIM_MAX_BATCH_SIZE=${MAX_BATCH:-256}
|
|
- NIM_LOG_LEVEL=${LOG_LEVEL:-INFO}
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
shm_size: ${SHM_SIZE:-16g}
|
|
ulimits:
|
|
memlock: -1
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 10
|
|
start_period: 120s
|
|
|
|
volumes:
|
|
nim_cache:
|