v2

2026-03-10 14:40:51 -03:00
parent 290f05be87
commit 92713a4d1c
30 changed files with 2074 additions and 0 deletions
--- a/ai-templates-0/stacks/triton/docker-compose.yml
+++ b/ai-templates-0/stacks/triton/docker-compose.yml
@@ -0,0 +1,43 @@
+version: "3.8"
+
+services:
+  triton:
+    image: nvcr.io/nvidia/tritonserver:${TRITON_VERSION:-24.08}-py3
+    container_name: triton-inference-server
+    restart: unless-stopped
+    ports:
+      - "${HTTP_PORT:-8000}:8000"
+      - "${GRPC_PORT:-8001}:8001"
+      - "${METRICS_PORT:-8002}:8002"
+    volumes:
+      - triton_models:/models
+    command: >
+      tritonserver
+      --model-repository=/models
+      --strict-model-config=${STRICT_CONFIG:-false}
+      --log-verbose=${LOG_VERBOSE:-0}
+      --exit-on-error=${EXIT_ON_ERROR:-false}
+      --rate-limit=${RATE_LIMIT:-off}
+      --model-control-mode=${MODEL_CONTROL:-poll}
+      --repository-poll-secs=${POLL_INTERVAL:-30}
+    environment:
+      - CUDA_VISIBLE_DEVICES=${CUDA_DEVICES:-all}
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    shm_size: ${SHM_SIZE:-1g}
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+
+volumes:
+  triton_models: