v2
This commit is contained in:
43
ai-templates-0/stacks/triton/docker-compose.yml
Normal file
43
ai-templates-0/stacks/triton/docker-compose.yml
Normal file
@@ -0,0 +1,43 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
triton:
|
||||
image: nvcr.io/nvidia/tritonserver:${TRITON_VERSION:-24.08}-py3
|
||||
container_name: triton-inference-server
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "${HTTP_PORT:-8000}:8000"
|
||||
- "${GRPC_PORT:-8001}:8001"
|
||||
- "${METRICS_PORT:-8002}:8002"
|
||||
volumes:
|
||||
- triton_models:/models
|
||||
command: >
|
||||
tritonserver
|
||||
--model-repository=/models
|
||||
--strict-model-config=${STRICT_CONFIG:-false}
|
||||
--log-verbose=${LOG_VERBOSE:-0}
|
||||
--exit-on-error=${EXIT_ON_ERROR:-false}
|
||||
--rate-limit=${RATE_LIMIT:-off}
|
||||
--model-control-mode=${MODEL_CONTROL:-poll}
|
||||
--repository-poll-secs=${POLL_INTERVAL:-30}
|
||||
environment:
|
||||
- CUDA_VISIBLE_DEVICES=${CUDA_DEVICES:-all}
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
shm_size: ${SHM_SIZE:-1g}
|
||||
ulimits:
|
||||
memlock: -1
|
||||
stack: 67108864
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
|
||||
volumes:
|
||||
triton_models:
|
||||
Reference in New Issue
Block a user