v2
This commit is contained in:
36
ai-templates-0/stacks/nvidia-nim/docker-compose.yml
Normal file
36
ai-templates-0/stacks/nvidia-nim/docker-compose.yml
Normal file
@@ -0,0 +1,36 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
nim:
|
||||
image: nvcr.io/nim/${NIM_MODEL:-meta/llama-3.1-8b-instruct}:${NIM_VERSION:-latest}
|
||||
container_name: nvidia-nim
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "${NIM_PORT:-8000}:8000"
|
||||
volumes:
|
||||
- nim_cache:/opt/nim/.cache
|
||||
environment:
|
||||
- NGC_API_KEY=${NGC_API_KEY}
|
||||
- NIM_MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096}
|
||||
- NIM_GPU_MEMORY_UTILIZATION=${GPU_MEM_UTIL:-0.9}
|
||||
- NIM_MAX_BATCH_SIZE=${MAX_BATCH:-256}
|
||||
- NIM_LOG_LEVEL=${LOG_LEVEL:-INFO}
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
shm_size: ${SHM_SIZE:-16g}
|
||||
ulimits:
|
||||
memlock: -1
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 120s
|
||||
|
||||
volumes:
|
||||
nim_cache:
|
||||
Reference in New Issue
Block a user