v2
This commit is contained in:
57
ai-templates-0/stacks/onnx-runtime/docker-compose.yml
Normal file
57
ai-templates-0/stacks/onnx-runtime/docker-compose.yml
Normal file
@@ -0,0 +1,57 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
# GPU variant — for data center / cloud nodes
|
||||
onnx-runtime-gpu:
|
||||
image: mcr.microsoft.com/onnxruntime/server:latest
|
||||
container_name: onnx-runtime-gpu
|
||||
restart: unless-stopped
|
||||
profiles: ["gpu"]
|
||||
ports:
|
||||
- "${HTTP_PORT:-8001}:8001"
|
||||
- "${GRPC_PORT:-50051}:50051"
|
||||
volumes:
|
||||
- onnx_models:/models
|
||||
environment:
|
||||
- ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
|
||||
command: >
|
||||
--model_path /models/${MODEL_FILE:-model.onnx}
|
||||
--http_port 8001
|
||||
--grpc_port 50051
|
||||
--num_threads ${NUM_THREADS:-4}
|
||||
--execution_provider ${EXEC_PROVIDER:-cuda}
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
|
||||
# CPU variant — for edge nodes, ARM, resource-constrained environments
|
||||
onnx-runtime-cpu:
|
||||
image: mcr.microsoft.com/onnxruntime/server:latest
|
||||
container_name: onnx-runtime-cpu
|
||||
restart: unless-stopped
|
||||
profiles: ["cpu", "edge"]
|
||||
ports:
|
||||
- "${HTTP_PORT:-8001}:8001"
|
||||
- "${GRPC_PORT:-50051}:50051"
|
||||
volumes:
|
||||
- onnx_models:/models
|
||||
environment:
|
||||
- ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
|
||||
command: >
|
||||
--model_path /models/${MODEL_FILE:-model.onnx}
|
||||
--http_port 8001
|
||||
--grpc_port 50051
|
||||
--num_threads ${NUM_THREADS:-4}
|
||||
--execution_provider cpu
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "${CPU_LIMIT:-2.0}"
|
||||
memory: ${MEM_LIMIT:-2G}
|
||||
|
||||
volumes:
|
||||
onnx_models:
|
||||
Reference in New Issue
Block a user