58 lines
1.5 KiB
YAML
58 lines
1.5 KiB
YAML
version: "3.8"
|
|
|
|
services:
|
|
# GPU variant — for data center / cloud nodes
|
|
onnx-runtime-gpu:
|
|
image: mcr.microsoft.com/onnxruntime/server:latest
|
|
container_name: onnx-runtime-gpu
|
|
restart: unless-stopped
|
|
profiles: ["gpu"]
|
|
ports:
|
|
- "${HTTP_PORT:-8001}:8001"
|
|
- "${GRPC_PORT:-50051}:50051"
|
|
volumes:
|
|
- onnx_models:/models
|
|
environment:
|
|
- ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
|
|
command: >
|
|
--model_path /models/${MODEL_FILE:-model.onnx}
|
|
--http_port 8001
|
|
--grpc_port 50051
|
|
--num_threads ${NUM_THREADS:-4}
|
|
--execution_provider ${EXEC_PROVIDER:-cuda}
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
|
|
# CPU variant — for edge nodes, ARM, resource-constrained environments
|
|
onnx-runtime-cpu:
|
|
image: mcr.microsoft.com/onnxruntime/server:latest
|
|
container_name: onnx-runtime-cpu
|
|
restart: unless-stopped
|
|
profiles: ["cpu", "edge"]
|
|
ports:
|
|
- "${HTTP_PORT:-8001}:8001"
|
|
- "${GRPC_PORT:-50051}:50051"
|
|
volumes:
|
|
- onnx_models:/models
|
|
environment:
|
|
- ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
|
|
command: >
|
|
--model_path /models/${MODEL_FILE:-model.onnx}
|
|
--http_port 8001
|
|
--grpc_port 50051
|
|
--num_threads ${NUM_THREADS:-4}
|
|
--execution_provider cpu
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: "${CPU_LIMIT:-2.0}"
|
|
memory: ${MEM_LIMIT:-2G}
|
|
|
|
volumes:
|
|
onnx_models:
|