Files
portainer_scripts/ai-templates-0/stacks/triton/docker-compose.yml
2026-03-10 14:40:51 -03:00

44 lines
1.1 KiB
YAML

version: "3.8"
services:
triton:
image: nvcr.io/nvidia/tritonserver:${TRITON_VERSION:-24.08}-py3
container_name: triton-inference-server
restart: unless-stopped
ports:
- "${HTTP_PORT:-8000}:8000"
- "${GRPC_PORT:-8001}:8001"
- "${METRICS_PORT:-8002}:8002"
volumes:
- triton_models:/models
command: >
tritonserver
--model-repository=/models
--strict-model-config=${STRICT_CONFIG:-false}
--log-verbose=${LOG_VERBOSE:-0}
--exit-on-error=${EXIT_ON_ERROR:-false}
--rate-limit=${RATE_LIMIT:-off}
--model-control-mode=${MODEL_CONTROL:-poll}
--repository-poll-secs=${POLL_INTERVAL:-30}
environment:
- CUDA_VISIBLE_DEVICES=${CUDA_DEVICES:-all}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
shm_size: ${SHM_SIZE:-1g}
ulimits:
memlock: -1
stack: 67108864
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"]
interval: 30s
timeout: 10s
retries: 5
volumes:
triton_models: