Add production WSGI server - Flask dev server unsuitable for production load

This adds a complete production deployment setup using Gunicorn as the WSGI server, replacing Flask's development server. Key components: - Gunicorn configuration with optimized worker settings - Support for sync, threaded, and async (gevent) workers - Automatic worker recycling to prevent memory leaks - Increased timeouts for audio processing - Production-ready logging and monitoring Deployment options: 1. Docker/Docker Compose for containerized deployment 2. Systemd service for traditional deployment 3. Nginx reverse proxy configuration 4. SSL/TLS support Production features: - wsgi.py entry point for WSGI servers - gunicorn_config.py with production settings - Dockerfile with multi-stage build - docker-compose.yml with full stack (Redis, PostgreSQL) - nginx.conf with caching and security headers - systemd service with security hardening - deploy.sh automated deployment script Configuration: - .env.production template with all settings - Support for environment-based configuration - Separate requirements-prod.txt - Prometheus metrics endpoint (/metrics) Monitoring: - Health check endpoints for liveness/readiness - Prometheus-compatible metrics - Structured logging - Memory usage tracking - Request counting Security: - Non-root user in Docker - Systemd security restrictions - Nginx security headers - File permission hardening - Resource limits Documentation: - Comprehensive PRODUCTION_DEPLOYMENT.md - Scaling strategies - Performance tuning guide - Troubleshooting section Also fixed memory_manager.py GC stats collection error. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-03 08:49:32 -06:00
parent 1b9ad03400
commit 92fd390866
13 changed files with 1237 additions and 2 deletions
--- a/app.py
+++ b/app.py
@@ -1232,6 +1232,50 @@ def liveness_check():
    """Liveness probe - basic check to see if process is alive"""
    return jsonify({'status': 'alive', 'timestamp': time.time()})

+@app.route('/metrics', methods=['GET'])
+def prometheus_metrics():
+    """Prometheus-compatible metrics endpoint"""
+    try:
+        # Import prometheus client if available
+        from prometheus_client import generate_latest, Counter, Histogram, Gauge
+        
+        # Define metrics
+        request_count = Counter('talk2me_requests_total', 'Total requests', ['method', 'endpoint'])
+        request_duration = Histogram('talk2me_request_duration_seconds', 'Request duration', ['method', 'endpoint'])
+        active_sessions = Gauge('talk2me_active_sessions', 'Active sessions')
+        memory_usage = Gauge('talk2me_memory_usage_bytes', 'Memory usage', ['type'])
+        
+        # Update metrics
+        if hasattr(app, 'session_manager'):
+            active_sessions.set(len(app.session_manager.sessions))
+        
+        if hasattr(app, 'memory_manager'):
+            stats = app.memory_manager.get_memory_stats()
+            memory_usage.labels(type='process').set(stats.process_memory_mb * 1024 * 1024)
+            memory_usage.labels(type='gpu').set(stats.gpu_memory_mb * 1024 * 1024)
+        
+        return generate_latest()
+    except ImportError:
+        # Prometheus client not installed, return basic metrics
+        metrics = []
+        
+        # Basic metrics in Prometheus format
+        metrics.append(f'# HELP talk2me_up Talk2Me service status')
+        metrics.append(f'# TYPE talk2me_up gauge')
+        metrics.append(f'talk2me_up 1')
+        
+        if hasattr(app, 'request_count'):
+            metrics.append(f'# HELP talk2me_requests_total Total number of requests')
+            metrics.append(f'# TYPE talk2me_requests_total counter')
+            metrics.append(f'talk2me_requests_total {app.request_count}')
+        
+        if hasattr(app, 'session_manager'):
+            metrics.append(f'# HELP talk2me_active_sessions Number of active sessions')
+            metrics.append(f'# TYPE talk2me_active_sessions gauge')
+            metrics.append(f'talk2me_active_sessions {len(app.session_manager.sessions)}')
+        
+        return '\n'.join(metrics), 200, {'Content-Type': 'text/plain; charset=utf-8'}
+
@app.route('/health/storage', methods=['GET'])
 def storage_health():
    """Check temporary file storage health"""