From 92fd390866c19922d725b86894f8119612899509 Mon Sep 17 00:00:00 2001 From: Adolfo Delorenzo Date: Tue, 3 Jun 2025 08:49:32 -0600 Subject: [PATCH] Add production WSGI server - Flask dev server unsuitable for production load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a complete production deployment setup using Gunicorn as the WSGI server, replacing Flask's development server. Key components: - Gunicorn configuration with optimized worker settings - Support for sync, threaded, and async (gevent) workers - Automatic worker recycling to prevent memory leaks - Increased timeouts for audio processing - Production-ready logging and monitoring Deployment options: 1. Docker/Docker Compose for containerized deployment 2. Systemd service for traditional deployment 3. Nginx reverse proxy configuration 4. SSL/TLS support Production features: - wsgi.py entry point for WSGI servers - gunicorn_config.py with production settings - Dockerfile with multi-stage build - docker-compose.yml with full stack (Redis, PostgreSQL) - nginx.conf with caching and security headers - systemd service with security hardening - deploy.sh automated deployment script Configuration: - .env.production template with all settings - Support for environment-based configuration - Separate requirements-prod.txt - Prometheus metrics endpoint (/metrics) Monitoring: - Health check endpoints for liveness/readiness - Prometheus-compatible metrics - Structured logging - Memory usage tracking - Request counting Security: - Non-root user in Docker - Systemd security restrictions - Nginx security headers - File permission hardening - Resource limits Documentation: - Comprehensive PRODUCTION_DEPLOYMENT.md - Scaling strategies - Performance tuning guide - Troubleshooting section Also fixed memory_manager.py GC stats collection error. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .dockerignore | 71 +++++++ Dockerfile | 46 +++++ PRODUCTION_DEPLOYMENT.md | 435 +++++++++++++++++++++++++++++++++++++++ README.md | 16 ++ app.py | 44 ++++ deploy.sh | 208 +++++++++++++++++++ docker-compose.yml | 92 +++++++++ gunicorn_config.py | 86 ++++++++ memory_manager.py | 6 +- nginx.conf | 108 ++++++++++ requirements-prod.txt | 27 +++ talk2me.service | 66 ++++++ wsgi.py | 34 +++ 13 files changed, 1237 insertions(+), 2 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 PRODUCTION_DEPLOYMENT.md create mode 100755 deploy.sh create mode 100644 docker-compose.yml create mode 100644 gunicorn_config.py create mode 100644 nginx.conf create mode 100644 requirements-prod.txt create mode 100644 talk2me.service create mode 100644 wsgi.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8fc4ca4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,71 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +venv/ +env/ +.venv +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +*.egg-info/ +.pytest_cache/ + +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Project specific +logs/ +*.log +.env +.env.* +!.env.production +*.db +*.sqlite +/tmp +/temp +test_*.py +tests/ + +# Documentation +*.md +!README.md +docs/ + +# CI/CD +.github/ +.gitlab-ci.yml +.travis.yml + +# Development files +deploy.sh +Makefile +docker-compose.override.yml \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..56dfdf2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +# Production Dockerfile for Talk2Me +FROM python:3.10-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + ffmpeg \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user +RUN useradd -m -u 1000 talk2me + +# Set working directory +WORKDIR /app + +# Copy requirements first for better caching +COPY requirements.txt requirements-prod.txt ./ +RUN pip install --no-cache-dir -r requirements-prod.txt + +# Copy application code +COPY --chown=talk2me:talk2me . . + +# Create necessary directories +RUN mkdir -p logs /tmp/talk2me_uploads && \ + chown -R talk2me:talk2me logs /tmp/talk2me_uploads + +# Switch to non-root user +USER talk2me + +# Set environment variables +ENV FLASK_ENV=production \ + PYTHONUNBUFFERED=1 \ + UPLOAD_FOLDER=/tmp/talk2me_uploads \ + LOGS_DIR=/app/logs + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ + CMD curl -f http://localhost:5005/health || exit 1 + +# Expose port +EXPOSE 5005 + +# Run with gunicorn +CMD ["gunicorn", "--config", "gunicorn_config.py", "wsgi:application"] \ No newline at end of file diff --git a/PRODUCTION_DEPLOYMENT.md b/PRODUCTION_DEPLOYMENT.md new file mode 100644 index 0000000..2de2481 --- /dev/null +++ b/PRODUCTION_DEPLOYMENT.md @@ -0,0 +1,435 @@ +# Production Deployment Guide + +This guide covers deploying Talk2Me in a production environment using a proper WSGI server. + +## Overview + +The Flask development server is not suitable for production use. This guide covers: +- Gunicorn as the WSGI server +- Nginx as a reverse proxy +- Docker for containerization +- Systemd for process management +- Security best practices + +## Quick Start with Docker + +### 1. Using Docker Compose + +```bash +# Clone the repository +git clone https://github.com/your-repo/talk2me.git +cd talk2me + +# Create .env file with production settings +cat > .env < backup.sql + +# Redis +redis-cli BGSAVE +``` + +### Application Backup + +```bash +# Backup application and logs +tar -czf talk2me-backup.tar.gz \ + /opt/talk2me \ + /var/log/talk2me \ + /etc/systemd/system/talk2me.service \ + /etc/nginx/sites-available/talk2me +``` + +## Troubleshooting + +### Service Won't Start + +```bash +# Check service status +systemctl status talk2me + +# Check logs +journalctl -u talk2me -n 100 + +# Test configuration +sudo -u talk2me /opt/talk2me/venv/bin/gunicorn --check-config wsgi:application +``` + +### High Memory Usage + +```bash +# Trigger cleanup +curl -X POST -H "X-Admin-Token: token" http://localhost:5005/admin/memory/cleanup + +# Restart workers +systemctl reload talk2me +``` + +### Slow Response Times + +1. Check worker count +2. Enable async workers +3. Check GPU availability +4. Review nginx buffering settings + +## Performance Optimization + +### 1. Enable GPU + +Ensure CUDA/ROCm is properly installed: + +```bash +# Check GPU +nvidia-smi # or rocm-smi + +# Set in environment +export CUDA_VISIBLE_DEVICES=0 +``` + +### 2. Optimize Workers + +```python +# For CPU-heavy workloads +workers = cpu_count() +threads = 1 + +# For I/O-heavy workloads +workers = cpu_count() * 2 +threads = 4 +``` + +### 3. Enable Caching + +Use Redis for caching translations: + +```python +CACHE_TYPE = 'redis' +CACHE_REDIS_URL = 'redis://localhost:6379/0' +``` + +## Maintenance + +### Regular Tasks + +1. **Log Rotation**: Configured automatically +2. **Database Cleanup**: Run weekly +3. **Model Updates**: Check for Whisper updates +4. **Security Updates**: Keep dependencies updated + +### Update Procedure + +```bash +# Backup first +./backup.sh + +# Update code +git pull + +# Update dependencies +sudo -u talk2me /opt/talk2me/venv/bin/pip install -r requirements-prod.txt + +# Restart service +sudo systemctl restart talk2me +``` + +## Rollback + +If deployment fails: + +```bash +# Stop service +sudo systemctl stop talk2me + +# Restore backup +tar -xzf talk2me-backup.tar.gz -C / + +# Restart service +sudo systemctl start talk2me +``` \ No newline at end of file diff --git a/README.md b/README.md index b71b44d..d13507c 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,22 @@ Comprehensive memory leak prevention for extended use: See [MEMORY_MANAGEMENT.md](MEMORY_MANAGEMENT.md) for detailed documentation. +## Production Deployment + +For production use, deploy with a proper WSGI server: +- Gunicorn with optimized worker configuration +- Nginx reverse proxy with caching +- Docker/Docker Compose support +- Systemd service management +- Comprehensive security hardening + +Quick start: +```bash +docker-compose up -d +``` + +See [PRODUCTION_DEPLOYMENT.md](PRODUCTION_DEPLOYMENT.md) for detailed deployment instructions. + ## Mobile Support The interface is fully responsive and designed to work well on mobile devices. diff --git a/app.py b/app.py index c5f725f..78c1e2b 100644 --- a/app.py +++ b/app.py @@ -1232,6 +1232,50 @@ def liveness_check(): """Liveness probe - basic check to see if process is alive""" return jsonify({'status': 'alive', 'timestamp': time.time()}) +@app.route('/metrics', methods=['GET']) +def prometheus_metrics(): + """Prometheus-compatible metrics endpoint""" + try: + # Import prometheus client if available + from prometheus_client import generate_latest, Counter, Histogram, Gauge + + # Define metrics + request_count = Counter('talk2me_requests_total', 'Total requests', ['method', 'endpoint']) + request_duration = Histogram('talk2me_request_duration_seconds', 'Request duration', ['method', 'endpoint']) + active_sessions = Gauge('talk2me_active_sessions', 'Active sessions') + memory_usage = Gauge('talk2me_memory_usage_bytes', 'Memory usage', ['type']) + + # Update metrics + if hasattr(app, 'session_manager'): + active_sessions.set(len(app.session_manager.sessions)) + + if hasattr(app, 'memory_manager'): + stats = app.memory_manager.get_memory_stats() + memory_usage.labels(type='process').set(stats.process_memory_mb * 1024 * 1024) + memory_usage.labels(type='gpu').set(stats.gpu_memory_mb * 1024 * 1024) + + return generate_latest() + except ImportError: + # Prometheus client not installed, return basic metrics + metrics = [] + + # Basic metrics in Prometheus format + metrics.append(f'# HELP talk2me_up Talk2Me service status') + metrics.append(f'# TYPE talk2me_up gauge') + metrics.append(f'talk2me_up 1') + + if hasattr(app, 'request_count'): + metrics.append(f'# HELP talk2me_requests_total Total number of requests') + metrics.append(f'# TYPE talk2me_requests_total counter') + metrics.append(f'talk2me_requests_total {app.request_count}') + + if hasattr(app, 'session_manager'): + metrics.append(f'# HELP talk2me_active_sessions Number of active sessions') + metrics.append(f'# TYPE talk2me_active_sessions gauge') + metrics.append(f'talk2me_active_sessions {len(app.session_manager.sessions)}') + + return '\n'.join(metrics), 200, {'Content-Type': 'text/plain; charset=utf-8'} + @app.route('/health/storage', methods=['GET']) def storage_health(): """Check temporary file storage health""" diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..efca923 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# Production deployment script for Talk2Me + +set -e # Exit on error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Configuration +APP_NAME="talk2me" +APP_USER="talk2me" +APP_DIR="/opt/talk2me" +VENV_DIR="$APP_DIR/venv" +LOG_DIR="/var/log/talk2me" +PID_FILE="/var/run/talk2me.pid" +WORKERS=${WORKERS:-4} + +# Functions +print_status() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +# Check if running as root +if [[ $EUID -ne 0 ]]; then + print_error "This script must be run as root" + exit 1 +fi + +# Create application user if doesn't exist +if ! id "$APP_USER" &>/dev/null; then + print_status "Creating application user: $APP_USER" + useradd -m -s /bin/bash $APP_USER +fi + +# Create directories +print_status "Creating application directories" +mkdir -p $APP_DIR $LOG_DIR +chown -R $APP_USER:$APP_USER $APP_DIR $LOG_DIR + +# Copy application files +print_status "Copying application files" +rsync -av --exclude='venv' --exclude='__pycache__' --exclude='*.pyc' \ + --exclude='logs' --exclude='.git' --exclude='node_modules' \ + ./ $APP_DIR/ + +# Create virtual environment +print_status "Setting up Python virtual environment" +su - $APP_USER -c "cd $APP_DIR && python3 -m venv $VENV_DIR" + +# Install dependencies +print_status "Installing Python dependencies" +su - $APP_USER -c "cd $APP_DIR && $VENV_DIR/bin/pip install --upgrade pip" +su - $APP_USER -c "cd $APP_DIR && $VENV_DIR/bin/pip install -r requirements-prod.txt" + +# Install Whisper model +print_status "Downloading Whisper model (this may take a while)" +su - $APP_USER -c "cd $APP_DIR && $VENV_DIR/bin/python -c 'import whisper; whisper.load_model(\"base\")'" + +# Build frontend assets +if [ -f "package.json" ]; then + print_status "Building frontend assets" + cd $APP_DIR + npm install + npm run build +fi + +# Create systemd service +print_status "Creating systemd service" +cat > /etc/systemd/system/talk2me.service < /etc/nginx/sites-available/talk2me <