Add multi-GPU support for Docker deployments

- Created separate docker-compose override files for different GPU types: - docker-compose.nvidia.yml for NVIDIA GPUs - docker-compose.amd.yml for AMD GPUs with ROCm - docker-compose.apple.yml for Apple Silicon - Updated README with GPU-specific Docker configurations - Updated deployment instructions to use appropriate override files - Added detailed configurations for each GPU type including: - Device mappings and drivers - Environment variables - Platform specifications - Memory and resource limits This allows users to easily deploy Talk2Me with their specific GPU hardware. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-03 09:16:41 -06:00
parent e5333d8410
commit bcbac5c8b3
4 changed files with 138 additions and 3 deletions
--- a/README.md
+++ b/README.md
@@ -280,18 +280,31 @@ curl -X POST -H "X-Admin-Token: $ADMIN_TOKEN" \
 ### Docker Deployment
 ```bash
-# Build and run with Docker Compose
+# Build and run with Docker Compose (CPU only)
 docker-compose up -d
 # With NVIDIA GPU support
 docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
 # With AMD GPU support (ROCm)
 docker-compose -f docker-compose.yml -f docker-compose.amd.yml up -d
 # With Apple Silicon support
 docker-compose -f docker-compose.yml -f docker-compose.apple.yml up -d
 # Scale web workers
-docker-compose up -d --scale web=4
+docker-compose up -d --scale talk2me=4
 # View logs
-docker-compose logs -f web
+docker-compose logs -f talk2me
 ```
 ### Docker Compose Configuration
 Choose the appropriate configuration based on your GPU:
 #### NVIDIA GPU Configuration
 ```yaml
 version: '3.8'
 services:
@@ -316,6 +329,82 @@ services:
              capabilities: [gpu]
 ```
 #### AMD GPU Configuration (ROCm)
 ```yaml
 version: '3.8'
 services:
  web:
    build: .
    ports:
      - "5005:5005"
    environment:
      - GUNICORN_WORKERS=4
      - GUNICORN_THREADS=2
      - HSA_OVERRIDE_GFX_VERSION=10.3.0  # Adjust for your GPU
    volumes:
      - ./logs:/app/logs
      - whisper-cache:/root/.cache/whisper
      - /dev/kfd:/dev/kfd  # ROCm KFD interface
      - /dev/dri:/dev/dri  # Direct Rendering Interface
    devices:
      - /dev/kfd
      - /dev/dri
    group_add:
      - video
      - render
    deploy:
      resources:
        limits:
          memory: 4G
 ```
 #### Apple Silicon Configuration
 ```yaml
 version: '3.8'
 services:
  web:
    build: .
    platform: linux/arm64/v8  # For M1/M2 Macs
    ports:
      - "5005:5005"
    environment:
      - GUNICORN_WORKERS=4
      - GUNICORN_THREADS=2
      - PYTORCH_ENABLE_MPS_FALLBACK=1  # Enable MPS fallback
    volumes:
      - ./logs:/app/logs
      - whisper-cache:/root/.cache/whisper
    deploy:
      resources:
        limits:
          memory: 4G
 ```
 #### CPU-Only Configuration
 ```yaml
 version: '3.8'
 services:
  web:
    build: .
    ports:
      - "5005:5005"
    environment:
      - GUNICORN_WORKERS=4
      - GUNICORN_THREADS=2
      - OMP_NUM_THREADS=4  # OpenMP threads for CPU
    volumes:
      - ./logs:/app/logs
      - whisper-cache:/root/.cache/whisper
    deploy:
      resources:
        limits:
          memory: 4G
          cpus: '4.0'
 ```
 ### Nginx Configuration
 ```nginx
--- a/docker-compose.amd.yml
+++ b/docker-compose.amd.yml
@@ -0,0 +1,19 @@
 version: '3.8'
 # Docker Compose override for AMD GPU support (ROCm)
 # Usage: docker-compose -f docker-compose.yml -f docker-compose.amd.yml up
 services:
  talk2me:
    environment:
      - HSA_OVERRIDE_GFX_VERSION=10.3.0  # Adjust based on your GPU model
      - ROCR_VISIBLE_DEVICES=0  # Use first GPU
    volumes:
      - /dev/kfd:/dev/kfd  # ROCm KFD interface
      - /dev/dri:/dev/dri  # Direct Rendering Interface
    devices:
      - /dev/kfd
      - /dev/dri
    group_add:
      - video  # Required for GPU access
      - render # Required for GPU access
--- a/docker-compose.apple.yml
+++ b/docker-compose.apple.yml
@@ -0,0 +1,11 @@
 version: '3.8'
 # Docker Compose override for Apple Silicon
 # Usage: docker-compose -f docker-compose.yml -f docker-compose.apple.yml up
 services:
  talk2me:
    platform: linux/arm64/v8  # For M1/M2/M3 Macs
    environment:
      - PYTORCH_ENABLE_MPS_FALLBACK=1  # Enable Metal Performance Shaders fallback
      - PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.7  # Memory management for MPS
--- a/docker-compose.nvidia.yml
+++ b/docker-compose.nvidia.yml
@@ -0,0 +1,16 @@
 version: '3.8'
 # Docker Compose override for NVIDIA GPU support
 # Usage: docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up
 services:
  talk2me:
    environment:
      - CUDA_VISIBLE_DEVICES=0  # Use first GPU
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]