Add multi-GPU support for Docker deployments

- Created separate docker-compose override files for different GPU types:
  - docker-compose.nvidia.yml for NVIDIA GPUs
  - docker-compose.amd.yml for AMD GPUs with ROCm
  - docker-compose.apple.yml for Apple Silicon
- Updated README with GPU-specific Docker configurations
- Updated deployment instructions to use appropriate override files
- Added detailed configurations for each GPU type including:
  - Device mappings and drivers
  - Environment variables
  - Platform specifications
  - Memory and resource limits

This allows users to easily deploy Talk2Me with their specific GPU hardware.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Adolfo Delorenzo 2025-06-03 09:16:41 -06:00
parent e5333d8410
commit bcbac5c8b3
4 changed files with 138 additions and 3 deletions

View File

@ -280,18 +280,31 @@ curl -X POST -H "X-Admin-Token: $ADMIN_TOKEN" \
### Docker Deployment
```bash
# Build and run with Docker Compose
# Build and run with Docker Compose (CPU only)
docker-compose up -d
# With NVIDIA GPU support
docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
# With AMD GPU support (ROCm)
docker-compose -f docker-compose.yml -f docker-compose.amd.yml up -d
# With Apple Silicon support
docker-compose -f docker-compose.yml -f docker-compose.apple.yml up -d
# Scale web workers
docker-compose up -d --scale web=4
docker-compose up -d --scale talk2me=4
# View logs
docker-compose logs -f web
docker-compose logs -f talk2me
```
### Docker Compose Configuration
Choose the appropriate configuration based on your GPU:
#### NVIDIA GPU Configuration
```yaml
version: '3.8'
services:
@ -316,6 +329,82 @@ services:
capabilities: [gpu]
```
#### AMD GPU Configuration (ROCm)
```yaml
version: '3.8'
services:
web:
build: .
ports:
- "5005:5005"
environment:
- GUNICORN_WORKERS=4
- GUNICORN_THREADS=2
- HSA_OVERRIDE_GFX_VERSION=10.3.0 # Adjust for your GPU
volumes:
- ./logs:/app/logs
- whisper-cache:/root/.cache/whisper
- /dev/kfd:/dev/kfd # ROCm KFD interface
- /dev/dri:/dev/dri # Direct Rendering Interface
devices:
- /dev/kfd
- /dev/dri
group_add:
- video
- render
deploy:
resources:
limits:
memory: 4G
```
#### Apple Silicon Configuration
```yaml
version: '3.8'
services:
web:
build: .
platform: linux/arm64/v8 # For M1/M2 Macs
ports:
- "5005:5005"
environment:
- GUNICORN_WORKERS=4
- GUNICORN_THREADS=2
- PYTORCH_ENABLE_MPS_FALLBACK=1 # Enable MPS fallback
volumes:
- ./logs:/app/logs
- whisper-cache:/root/.cache/whisper
deploy:
resources:
limits:
memory: 4G
```
#### CPU-Only Configuration
```yaml
version: '3.8'
services:
web:
build: .
ports:
- "5005:5005"
environment:
- GUNICORN_WORKERS=4
- GUNICORN_THREADS=2
- OMP_NUM_THREADS=4 # OpenMP threads for CPU
volumes:
- ./logs:/app/logs
- whisper-cache:/root/.cache/whisper
deploy:
resources:
limits:
memory: 4G
cpus: '4.0'
```
### Nginx Configuration
```nginx

19
docker-compose.amd.yml Normal file
View File

@ -0,0 +1,19 @@
version: '3.8'
# Docker Compose override for AMD GPU support (ROCm)
# Usage: docker-compose -f docker-compose.yml -f docker-compose.amd.yml up
services:
talk2me:
environment:
- HSA_OVERRIDE_GFX_VERSION=10.3.0 # Adjust based on your GPU model
- ROCR_VISIBLE_DEVICES=0 # Use first GPU
volumes:
- /dev/kfd:/dev/kfd # ROCm KFD interface
- /dev/dri:/dev/dri # Direct Rendering Interface
devices:
- /dev/kfd
- /dev/dri
group_add:
- video # Required for GPU access
- render # Required for GPU access

11
docker-compose.apple.yml Normal file
View File

@ -0,0 +1,11 @@
version: '3.8'
# Docker Compose override for Apple Silicon
# Usage: docker-compose -f docker-compose.yml -f docker-compose.apple.yml up
services:
talk2me:
platform: linux/arm64/v8 # For M1/M2/M3 Macs
environment:
- PYTORCH_ENABLE_MPS_FALLBACK=1 # Enable Metal Performance Shaders fallback
- PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.7 # Memory management for MPS

16
docker-compose.nvidia.yml Normal file
View File

@ -0,0 +1,16 @@
version: '3.8'
# Docker Compose override for NVIDIA GPU support
# Usage: docker-compose -f docker-compose.yml -f docker-compose.nvidia.yml up
services:
talk2me:
environment:
- CUDA_VISIBLE_DEVICES=0 # Use first GPU
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]