version: "3.8"

services:
  # GPU variant — for data center / cloud nodes
  onnx-runtime-gpu:
    image: mcr.microsoft.com/onnxruntime/server:latest
    container_name: onnx-runtime-gpu
    restart: unless-stopped
    profiles: ["gpu"]
    ports:
      - "${HTTP_PORT:-8001}:8001"
      - "${GRPC_PORT:-50051}:50051"
    volumes:
      - onnx_models:/models
    environment:
      - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
    command: >
      --model_path /models/${MODEL_FILE:-model.onnx}
      --http_port 8001
      --grpc_port 50051
      --num_threads ${NUM_THREADS:-4}
      --execution_provider ${EXEC_PROVIDER:-cuda}
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]

  # CPU variant — for edge nodes, ARM, resource-constrained environments
  onnx-runtime-cpu:
    image: mcr.microsoft.com/onnxruntime/server:latest
    container_name: onnx-runtime-cpu
    restart: unless-stopped
    profiles: ["cpu", "edge"]
    ports:
      - "${HTTP_PORT:-8001}:8001"
      - "${GRPC_PORT:-50051}:50051"
    volumes:
      - onnx_models:/models
    environment:
      - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING}
    command: >
      --model_path /models/${MODEL_FILE:-model.onnx}
      --http_port 8001
      --grpc_port 50051
      --num_threads ${NUM_THREADS:-4}
      --execution_provider cpu
    deploy:
      resources:
        limits:
          cpus: "${CPU_LIMIT:-2.0}"
          memory: ${MEM_LIMIT:-2G}

volumes:
  onnx_models: