This adds a complete production deployment setup using Gunicorn as the WSGI server, replacing Flask's development server. Key components: - Gunicorn configuration with optimized worker settings - Support for sync, threaded, and async (gevent) workers - Automatic worker recycling to prevent memory leaks - Increased timeouts for audio processing - Production-ready logging and monitoring Deployment options: 1. Docker/Docker Compose for containerized deployment 2. Systemd service for traditional deployment 3. Nginx reverse proxy configuration 4. SSL/TLS support Production features: - wsgi.py entry point for WSGI servers - gunicorn_config.py with production settings - Dockerfile with multi-stage build - docker-compose.yml with full stack (Redis, PostgreSQL) - nginx.conf with caching and security headers - systemd service with security hardening - deploy.sh automated deployment script Configuration: - .env.production template with all settings - Support for environment-based configuration - Separate requirements-prod.txt - Prometheus metrics endpoint (/metrics) Monitoring: - Health check endpoints for liveness/readiness - Prometheus-compatible metrics - Structured logging - Memory usage tracking - Request counting Security: - Non-root user in Docker - Systemd security restrictions - Nginx security headers - File permission hardening - Resource limits Documentation: - Comprehensive PRODUCTION_DEPLOYMENT.md - Scaling strategies - Performance tuning guide - Troubleshooting section Also fixed memory_manager.py GC stats collection error. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
86 lines
2.5 KiB
Python
86 lines
2.5 KiB
Python
"""
|
|
Gunicorn configuration for production deployment
|
|
"""
|
|
import multiprocessing
|
|
import os
|
|
|
|
# Server socket
|
|
bind = os.environ.get('GUNICORN_BIND', '0.0.0.0:5005')
|
|
backlog = 2048
|
|
|
|
# Worker processes
|
|
# Use 2-4 workers per CPU core
|
|
workers = int(os.environ.get('GUNICORN_WORKERS', multiprocessing.cpu_count() * 2 + 1))
|
|
worker_class = 'sync' # Use 'gevent' for async if needed
|
|
worker_connections = 1000
|
|
timeout = 120 # Increased for audio processing
|
|
keepalive = 5
|
|
|
|
# Restart workers after this many requests, to help prevent memory leaks
|
|
max_requests = 1000
|
|
max_requests_jitter = 50
|
|
|
|
# Preload the application
|
|
preload_app = True
|
|
|
|
# Server mechanics
|
|
daemon = False
|
|
pidfile = os.environ.get('GUNICORN_PID', '/tmp/talk2me.pid')
|
|
user = None
|
|
group = None
|
|
tmp_upload_dir = None
|
|
|
|
# Logging
|
|
accesslog = os.environ.get('GUNICORN_ACCESS_LOG', '-')
|
|
errorlog = os.environ.get('GUNICORN_ERROR_LOG', '-')
|
|
loglevel = os.environ.get('GUNICORN_LOG_LEVEL', 'info')
|
|
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
|
|
|
|
# Process naming
|
|
proc_name = 'talk2me'
|
|
|
|
# Server hooks
|
|
def when_ready(server):
|
|
"""Called just after the server is started."""
|
|
server.log.info("Server is ready. Spawning workers")
|
|
|
|
def worker_int(worker):
|
|
"""Called just after a worker exited on SIGINT or SIGQUIT."""
|
|
worker.log.info("Worker received INT or QUIT signal")
|
|
|
|
def pre_fork(server, worker):
|
|
"""Called just before a worker is forked."""
|
|
server.log.info(f"Forking worker {worker}")
|
|
|
|
def post_fork(server, worker):
|
|
"""Called just after a worker has been forked."""
|
|
server.log.info(f"Worker spawned (pid: {worker.pid})")
|
|
|
|
def worker_exit(server, worker):
|
|
"""Called just after a worker has been killed."""
|
|
server.log.info(f"Worker exit (pid: {worker.pid})")
|
|
|
|
def pre_request(worker, req):
|
|
"""Called just before a worker processes the request."""
|
|
worker.log.debug(f"{req.method} {req.path}")
|
|
|
|
def post_request(worker, req, environ, resp):
|
|
"""Called after a worker processes the request."""
|
|
worker.log.debug(f"{req.method} {req.path} - {resp.status}")
|
|
|
|
# SSL/TLS (uncomment if using HTTPS directly)
|
|
# keyfile = '/path/to/keyfile'
|
|
# certfile = '/path/to/certfile'
|
|
# ssl_version = 'TLSv1_2'
|
|
# cert_reqs = 'required'
|
|
# ca_certs = '/path/to/ca_certs'
|
|
|
|
# Thread option (if using threaded workers)
|
|
threads = int(os.environ.get('GUNICORN_THREADS', 1))
|
|
|
|
# Silent health checks in logs
|
|
def pre_request(worker, req):
|
|
if req.path in ['/health', '/health/live']:
|
|
# Don't log health checks
|
|
return
|
|
worker.log.debug(f"{req.method} {req.path}") |