Add multi-GPU support for Docker deployments

- Created separate docker-compose override files for different GPU types: - docker-compose.nvidia.yml for NVIDIA GPUs - docker-compose.amd.yml for AMD GPUs with ROCm - docker-compose.apple.yml for Apple Silicon - Updated README with GPU-specific Docker configurations - Updated deployment instructions to use appropriate override files - Added detailed configurations for each GPU type including: - Device mappings and drivers - Environment variables - Platform specifications - Memory and resource limits This allows users to easily deploy Talk2Me with their specific GPU hardware. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-03 09:16:41 -06:00 · 2025-06-03 09:16:41 -06:00 · bcbac5c8b3
commit bcbac5c8b3
parent e5333d8410
4 changed files with 138 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -280,18 +280,31 @@ curl -X POST -H "X-Admin-Token: $ADMIN_TOKEN" \
 ### Docker Deployment

 ```bash
-# Build and run with Docker Compose
+# Build and run with Docker Compose (CPU only)
 docker-compose up -d

+# With NVIDIA GPU support
+docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
+
+# With AMD GPU support (ROCm)
+docker-compose -f docker-compose.yml -f docker-compose.amd.yml up -d
+
+# With Apple Silicon support
+docker-compose -f docker-compose.yml -f docker-compose.apple.yml up -d
+
 # Scale web workers
-docker-compose up -d --scale web=4
+docker-compose up -d --scale talk2me=4

 # View logs
-docker-compose logs -f web
+docker-compose logs -f talk2me
 ```

 ### Docker Compose Configuration

+Choose the appropriate configuration based on your GPU:
+
+#### NVIDIA GPU Configuration
+
 ```yaml
 version: '3.8'
 services:
@ -316,6 +329,82 @@ services:
              capabilities: [gpu]
 ```

+#### AMD GPU Configuration (ROCm)
+
+```yaml
+version: '3.8'
+services:
+  web:
+    build: .
+    ports:
+      - "5005:5005"
+    environment:
+      - GUNICORN_WORKERS=4
+      - GUNICORN_THREADS=2
+      - HSA_OVERRIDE_GFX_VERSION=10.3.0  # Adjust for your GPU
+    volumes:
+      - ./logs:/app/logs
+      - whisper-cache:/root/.cache/whisper
+      - /dev/kfd:/dev/kfd  # ROCm KFD interface
+      - /dev/dri:/dev/dri  # Direct Rendering Interface
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    group_add:
+      - video
+      - render
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+```
+
+#### Apple Silicon Configuration
+
+```yaml
+version: '3.8'
+services:
+  web:
+    build: .
+    platform: linux/arm64/v8  # For M1/M2 Macs
+    ports:
+      - "5005:5005"
+    environment:
+      - GUNICORN_WORKERS=4
+      - GUNICORN_THREADS=2
+      - PYTORCH_ENABLE_MPS_FALLBACK=1  # Enable MPS fallback
+    volumes:
+      - ./logs:/app/logs
+      - whisper-cache:/root/.cache/whisper
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+```
+
+#### CPU-Only Configuration
+
+```yaml
+version: '3.8'
+services:
+  web:
+    build: .
+    ports:
+      - "5005:5005"
+    environment:
+      - GUNICORN_WORKERS=4
+      - GUNICORN_THREADS=2
+      - OMP_NUM_THREADS=4  # OpenMP threads for CPU
+    volumes:
+      - ./logs:/app/logs
+      - whisper-cache:/root/.cache/whisper
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+          cpus: '4.0'
+```
+
 ### Nginx Configuration

 ```nginx
--- a/docker-compose.amd.yml
+++ b/docker-compose.amd.yml
@ -0,0 +1,19 @@
+version: '3.8'
+
+# Docker Compose override for AMD GPU support (ROCm)
+# Usage: docker-compose -f docker-compose.yml -f docker-compose.amd.yml up
+
+services:
+  talk2me:
+    environment:
+      - HSA_OVERRIDE_GFX_VERSION=10.3.0  # Adjust based on your GPU model
+      - ROCR_VISIBLE_DEVICES=0  # Use first GPU
+    volumes:
+      - /dev/kfd:/dev/kfd  # ROCm KFD interface
+      - /dev/dri:/dev/dri  # Direct Rendering Interface
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    group_add:
+      - video  # Required for GPU access
+      - render # Required for GPU access
--- a/docker-compose.apple.yml
+++ b/docker-compose.apple.yml
@ -0,0 +1,11 @@
+version: '3.8'
+
+# Docker Compose override for Apple Silicon
+# Usage: docker-compose -f docker-compose.yml -f docker-compose.apple.yml up
+
+services:
+  talk2me:
+    platform: linux/arm64/v8  # For M1/M2/M3 Macs
+    environment:
+      - PYTORCH_ENABLE_MPS_FALLBACK=1  # Enable Metal Performance Shaders fallback
+      - PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.7  # Memory management for MPS
--- a/docker-compose.nvidia.yml
+++ b/docker-compose.nvidia.yml
@ -0,0 +1,16 @@
+version: '3.8'
+
+# Docker Compose override for NVIDIA GPU support
+# Usage: docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up
+
+services:
+  talk2me:
+    environment:
+      - CUDA_VISIBLE_DEVICES=0  # Use first GPU
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]