This adds a complete production deployment setup using Gunicorn as the WSGI server, replacing Flask's development server. Key components: - Gunicorn configuration with optimized worker settings - Support for sync, threaded, and async (gevent) workers - Automatic worker recycling to prevent memory leaks - Increased timeouts for audio processing - Production-ready logging and monitoring Deployment options: 1. Docker/Docker Compose for containerized deployment 2. Systemd service for traditional deployment 3. Nginx reverse proxy configuration 4. SSL/TLS support Production features: - wsgi.py entry point for WSGI servers - gunicorn_config.py with production settings - Dockerfile with multi-stage build - docker-compose.yml with full stack (Redis, PostgreSQL) - nginx.conf with caching and security headers - systemd service with security hardening - deploy.sh automated deployment script Configuration: - .env.production template with all settings - Support for environment-based configuration - Separate requirements-prod.txt - Prometheus metrics endpoint (/metrics) Monitoring: - Health check endpoints for liveness/readiness - Prometheus-compatible metrics - Structured logging - Memory usage tracking - Request counting Security: - Non-root user in Docker - Systemd security restrictions - Nginx security headers - File permission hardening - Resource limits Documentation: - Comprehensive PRODUCTION_DEPLOYMENT.md - Scaling strategies - Performance tuning guide - Troubleshooting section Also fixed memory_manager.py GC stats collection error. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
92 lines
2.3 KiB
YAML
92 lines
2.3 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
talk2me:
|
|
build: .
|
|
container_name: talk2me
|
|
restart: unless-stopped
|
|
ports:
|
|
- "5005:5005"
|
|
environment:
|
|
- FLASK_ENV=production
|
|
- UPLOAD_FOLDER=/tmp/talk2me_uploads
|
|
- LOGS_DIR=/app/logs
|
|
- TTS_SERVER_URL=${TTS_SERVER_URL:-http://localhost:5050/v1/audio/speech}
|
|
- TTS_API_KEY=${TTS_API_KEY}
|
|
- ADMIN_TOKEN=${ADMIN_TOKEN:-change-me-in-production}
|
|
- SECRET_KEY=${SECRET_KEY:-change-me-in-production}
|
|
- GUNICORN_WORKERS=${GUNICORN_WORKERS:-4}
|
|
- GUNICORN_THREADS=${GUNICORN_THREADS:-2}
|
|
- MEMORY_THRESHOLD_MB=${MEMORY_THRESHOLD_MB:-4096}
|
|
- GPU_MEMORY_THRESHOLD_MB=${GPU_MEMORY_THRESHOLD_MB:-2048}
|
|
volumes:
|
|
- ./logs:/app/logs
|
|
- talk2me_uploads:/tmp/talk2me_uploads
|
|
- talk2me_models:/root/.cache/whisper # Whisper models cache
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 4G
|
|
reservations:
|
|
memory: 2G
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:5005/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
networks:
|
|
- talk2me_network
|
|
|
|
# Nginx reverse proxy (optional, for production)
|
|
nginx:
|
|
image: nginx:alpine
|
|
container_name: talk2me_nginx
|
|
restart: unless-stopped
|
|
ports:
|
|
- "80:80"
|
|
- "443:443"
|
|
volumes:
|
|
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
|
|
- ./static:/app/static:ro
|
|
- nginx_ssl:/etc/nginx/ssl
|
|
depends_on:
|
|
- talk2me
|
|
networks:
|
|
- talk2me_network
|
|
|
|
# Redis for session storage (optional)
|
|
redis:
|
|
image: redis:7-alpine
|
|
container_name: talk2me_redis
|
|
restart: unless-stopped
|
|
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
|
|
volumes:
|
|
- redis_data:/data
|
|
networks:
|
|
- talk2me_network
|
|
|
|
# PostgreSQL for persistent storage (optional)
|
|
postgres:
|
|
image: postgres:15-alpine
|
|
container_name: talk2me_postgres
|
|
restart: unless-stopped
|
|
environment:
|
|
- POSTGRES_DB=talk2me
|
|
- POSTGRES_USER=talk2me
|
|
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-change-me-in-production}
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql/data
|
|
networks:
|
|
- talk2me_network
|
|
|
|
volumes:
|
|
talk2me_uploads:
|
|
talk2me_models:
|
|
redis_data:
|
|
postgres_data:
|
|
nginx_ssl:
|
|
|
|
networks:
|
|
talk2me_network:
|
|
driver: bridge |