v2

2026-03-10 14:40:51 -03:00
parent 290f05be87
commit 92713a4d1c
30 changed files with 2074 additions and 0 deletions
--- a/ai-templates-0/stacks/onnx-runtime/docker-compose.yml
+++ b/ai-templates-0/stacks/onnx-runtime/docker-compose.yml
@@ -0,0 +1,57 @@
+version: "3.8"
+
+services:
+  # GPU variant — for data center / cloud nodes
+  onnx-runtime-gpu:
+    image: mcr.microsoft.com/onnxruntime/server:latest
+    container_name: onnx-runtime-gpu
+    restart: unless-stopped
+    profiles: ["gpu"]
+    ports:
+      - "${HTTP_PORT:-8001}:8001"
+      - "${GRPC_PORT:-50051}:50051"
+    volumes:
+      - onnx_models:/models
+    environment:
+      - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
+    command: >
+      --model_path /models/${MODEL_FILE:-model.onnx}
+      --http_port 8001
+      --grpc_port 50051
+      --num_threads ${NUM_THREADS:-4}
+      --execution_provider ${EXEC_PROVIDER:-cuda}
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+
+  # CPU variant — for edge nodes, ARM, resource-constrained environments
+  onnx-runtime-cpu:
+    image: mcr.microsoft.com/onnxruntime/server:latest
+    container_name: onnx-runtime-cpu
+    restart: unless-stopped
+    profiles: ["cpu", "edge"]
+    ports:
+      - "${HTTP_PORT:-8001}:8001"
+      - "${GRPC_PORT:-50051}:50051"
+    volumes:
+      - onnx_models:/models
+    environment:
+      - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
+    command: >
+      --model_path /models/${MODEL_FILE:-model.onnx}
+      --http_port 8001
+      --grpc_port 50051
+      --num_threads ${NUM_THREADS:-4}
+      --execution_provider cpu
+    deploy:
+      resources:
+        limits:
+          cpus: "${CPU_LIMIT:-2.0}"
+          memory: ${MEM_LIMIT:-2G}
+
+volumes:
+  onnx_models: