Files
portainer_scripts/ai-templates-0/stacks/onnx-runtime/docker-compose.yml
2026-03-10 14:40:51 -03:00

58 lines
1.5 KiB
YAML

version: "3.8"
services:
# GPU variant — for data center / cloud nodes
onnx-runtime-gpu:
image: mcr.microsoft.com/onnxruntime/server:latest
container_name: onnx-runtime-gpu
restart: unless-stopped
profiles: ["gpu"]
ports:
- "${HTTP_PORT:-8001}:8001"
- "${GRPC_PORT:-50051}:50051"
volumes:
- onnx_models:/models
environment:
- ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
command: >
--model_path /models/${MODEL_FILE:-model.onnx}
--http_port 8001
--grpc_port 50051
--num_threads ${NUM_THREADS:-4}
--execution_provider ${EXEC_PROVIDER:-cuda}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
# CPU variant — for edge nodes, ARM, resource-constrained environments
onnx-runtime-cpu:
image: mcr.microsoft.com/onnxruntime/server:latest
container_name: onnx-runtime-cpu
restart: unless-stopped
profiles: ["cpu", "edge"]
ports:
- "${HTTP_PORT:-8001}:8001"
- "${GRPC_PORT:-50051}:50051"
volumes:
- onnx_models:/models
environment:
- ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
command: >
--model_path /models/${MODEL_FILE:-model.onnx}
--http_port 8001
--grpc_port 50051
--num_threads ${NUM_THREADS:-4}
--execution_provider cpu
deploy:
resources:
limits:
cpus: "${CPU_LIMIT:-2.0}"
memory: ${MEM_LIMIT:-2G}
volumes:
onnx_models: