From bcbac5c8b3ca01fa216fe3fca0884a8f21c1fd3a Mon Sep 17 00:00:00 2001 From: Adolfo Delorenzo Date: Tue, 3 Jun 2025 09:16:41 -0600 Subject: [PATCH] Add multi-GPU support for Docker deployments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Created separate docker-compose override files for different GPU types: - docker-compose.nvidia.yml for NVIDIA GPUs - docker-compose.amd.yml for AMD GPUs with ROCm - docker-compose.apple.yml for Apple Silicon - Updated README with GPU-specific Docker configurations - Updated deployment instructions to use appropriate override files - Added detailed configurations for each GPU type including: - Device mappings and drivers - Environment variables - Platform specifications - Memory and resource limits This allows users to easily deploy Talk2Me with their specific GPU hardware. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- README.md | 95 +++++++++++++++++++++++++++++++++++++-- docker-compose.amd.yml | 19 ++++++++ docker-compose.apple.yml | 11 +++++ docker-compose.nvidia.yml | 16 +++++++ 4 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 docker-compose.amd.yml create mode 100644 docker-compose.apple.yml create mode 100644 docker-compose.nvidia.yml diff --git a/README.md b/README.md index d9dd834..677ab12 100644 --- a/README.md +++ b/README.md @@ -280,18 +280,31 @@ curl -X POST -H "X-Admin-Token: $ADMIN_TOKEN" \ ### Docker Deployment ```bash -# Build and run with Docker Compose +# Build and run with Docker Compose (CPU only) docker-compose up -d +# With NVIDIA GPU support +docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d + +# With AMD GPU support (ROCm) +docker-compose -f docker-compose.yml -f docker-compose.amd.yml up -d + +# With Apple Silicon support +docker-compose -f docker-compose.yml -f docker-compose.apple.yml up -d + # Scale web workers -docker-compose up -d --scale web=4 +docker-compose up -d --scale talk2me=4 # View logs -docker-compose logs -f web +docker-compose logs -f talk2me ``` ### Docker Compose Configuration +Choose the appropriate configuration based on your GPU: + +#### NVIDIA GPU Configuration + ```yaml version: '3.8' services: @@ -316,6 +329,82 @@ services: capabilities: [gpu] ``` +#### AMD GPU Configuration (ROCm) + +```yaml +version: '3.8' +services: + web: + build: . + ports: + - "5005:5005" + environment: + - GUNICORN_WORKERS=4 + - GUNICORN_THREADS=2 + - HSA_OVERRIDE_GFX_VERSION=10.3.0 # Adjust for your GPU + volumes: + - ./logs:/app/logs + - whisper-cache:/root/.cache/whisper + - /dev/kfd:/dev/kfd # ROCm KFD interface + - /dev/dri:/dev/dri # Direct Rendering Interface + devices: + - /dev/kfd + - /dev/dri + group_add: + - video + - render + deploy: + resources: + limits: + memory: 4G +``` + +#### Apple Silicon Configuration + +```yaml +version: '3.8' +services: + web: + build: . + platform: linux/arm64/v8 # For M1/M2 Macs + ports: + - "5005:5005" + environment: + - GUNICORN_WORKERS=4 + - GUNICORN_THREADS=2 + - PYTORCH_ENABLE_MPS_FALLBACK=1 # Enable MPS fallback + volumes: + - ./logs:/app/logs + - whisper-cache:/root/.cache/whisper + deploy: + resources: + limits: + memory: 4G +``` + +#### CPU-Only Configuration + +```yaml +version: '3.8' +services: + web: + build: . + ports: + - "5005:5005" + environment: + - GUNICORN_WORKERS=4 + - GUNICORN_THREADS=2 + - OMP_NUM_THREADS=4 # OpenMP threads for CPU + volumes: + - ./logs:/app/logs + - whisper-cache:/root/.cache/whisper + deploy: + resources: + limits: + memory: 4G + cpus: '4.0' +``` + ### Nginx Configuration ```nginx diff --git a/docker-compose.amd.yml b/docker-compose.amd.yml new file mode 100644 index 0000000..2c947c1 --- /dev/null +++ b/docker-compose.amd.yml @@ -0,0 +1,19 @@ +version: '3.8' + +# Docker Compose override for AMD GPU support (ROCm) +# Usage: docker-compose -f docker-compose.yml -f docker-compose.amd.yml up + +services: + talk2me: + environment: + - HSA_OVERRIDE_GFX_VERSION=10.3.0 # Adjust based on your GPU model + - ROCR_VISIBLE_DEVICES=0 # Use first GPU + volumes: + - /dev/kfd:/dev/kfd # ROCm KFD interface + - /dev/dri:/dev/dri # Direct Rendering Interface + devices: + - /dev/kfd + - /dev/dri + group_add: + - video # Required for GPU access + - render # Required for GPU access \ No newline at end of file diff --git a/docker-compose.apple.yml b/docker-compose.apple.yml new file mode 100644 index 0000000..2841eac --- /dev/null +++ b/docker-compose.apple.yml @@ -0,0 +1,11 @@ +version: '3.8' + +# Docker Compose override for Apple Silicon +# Usage: docker-compose -f docker-compose.yml -f docker-compose.apple.yml up + +services: + talk2me: + platform: linux/arm64/v8 # For M1/M2/M3 Macs + environment: + - PYTORCH_ENABLE_MPS_FALLBACK=1 # Enable Metal Performance Shaders fallback + - PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.7 # Memory management for MPS \ No newline at end of file diff --git a/docker-compose.nvidia.yml b/docker-compose.nvidia.yml new file mode 100644 index 0000000..c702e05 --- /dev/null +++ b/docker-compose.nvidia.yml @@ -0,0 +1,16 @@ +version: '3.8' + +# Docker Compose override for NVIDIA GPU support +# Usage: docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up + +services: + talk2me: + environment: + - CUDA_VISIBLE_DEVICES=0 # Use first GPU + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] \ No newline at end of file