version: "3.8" services: # GPU variant — for data center / cloud nodes onnx-runtime-gpu: image: mcr.microsoft.com/onnxruntime/server:latest container_name: onnx-runtime-gpu restart: unless-stopped profiles: ["gpu"] ports: - "${HTTP_PORT:-8001}:8001" - "${GRPC_PORT:-50051}:50051" volumes: - onnx_models:/models environment: - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING} command: > --model_path /models/${MODEL_FILE:-model.onnx} --http_port 8001 --grpc_port 50051 --num_threads ${NUM_THREADS:-4} --execution_provider ${EXEC_PROVIDER:-cuda} deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] # CPU variant — for edge nodes, ARM, resource-constrained environments onnx-runtime-cpu: image: mcr.microsoft.com/onnxruntime/server:latest container_name: onnx-runtime-cpu restart: unless-stopped profiles: ["cpu", "edge"] ports: - "${HTTP_PORT:-8001}:8001" - "${GRPC_PORT:-50051}:50051" volumes: - onnx_models:/models environment: - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING} command: > --model_path /models/${MODEL_FILE:-model.onnx} --http_port 8001 --grpc_port 50051 --num_threads ${NUM_THREADS:-4} --execution_provider cpu deploy: resources: limits: cpus: "${CPU_LIMIT:-2.0}" memory: ${MEM_LIMIT:-2G} volumes: onnx_models: