This commit is contained in:
2026-03-10 14:40:51 -03:00
parent 290f05be87
commit 92713a4d1c
30 changed files with 2074 additions and 0 deletions

View File

@@ -0,0 +1,43 @@
version: "3.8"
services:
triton:
image: nvcr.io/nvidia/tritonserver:${TRITON_VERSION:-24.08}-py3
container_name: triton-inference-server
restart: unless-stopped
ports:
- "${HTTP_PORT:-8000}:8000"
- "${GRPC_PORT:-8001}:8001"
- "${METRICS_PORT:-8002}:8002"
volumes:
- triton_models:/models
command: >
tritonserver
--model-repository=/models
--strict-model-config=${STRICT_CONFIG:-false}
--log-verbose=${LOG_VERBOSE:-0}
--exit-on-error=${EXIT_ON_ERROR:-false}
--rate-limit=${RATE_LIMIT:-off}
--model-control-mode=${MODEL_CONTROL:-poll}
--repository-poll-secs=${POLL_INTERVAL:-30}
environment:
- CUDA_VISIBLE_DEVICES=${CUDA_DEVICES:-all}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
shm_size: ${SHM_SIZE:-1g}
ulimits:
memlock: -1
stack: 67108864
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"]
interval: 30s
timeout: 10s
retries: 5
volumes:
triton_models: