This adds a complete production deployment setup using Gunicorn as the WSGI server, replacing Flask's development server. Key components: - Gunicorn configuration with optimized worker settings - Support for sync, threaded, and async (gevent) workers - Automatic worker recycling to prevent memory leaks - Increased timeouts for audio processing - Production-ready logging and monitoring Deployment options: 1. Docker/Docker Compose for containerized deployment 2. Systemd service for traditional deployment 3. Nginx reverse proxy configuration 4. SSL/TLS support Production features: - wsgi.py entry point for WSGI servers - gunicorn_config.py with production settings - Dockerfile with multi-stage build - docker-compose.yml with full stack (Redis, PostgreSQL) - nginx.conf with caching and security headers - systemd service with security hardening - deploy.sh automated deployment script Configuration: - .env.production template with all settings - Support for environment-based configuration - Separate requirements-prod.txt - Prometheus metrics endpoint (/metrics) Monitoring: - Health check endpoints for liveness/readiness - Prometheus-compatible metrics - Structured logging - Memory usage tracking - Request counting Security: - Non-root user in Docker - Systemd security restrictions - Nginx security headers - File permission hardening - Resource limits Documentation: - Comprehensive PRODUCTION_DEPLOYMENT.md - Scaling strategies - Performance tuning guide - Troubleshooting section Also fixed memory_manager.py GC stats collection error. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
66 lines
1.4 KiB
Desktop File
66 lines
1.4 KiB
Desktop File
[Unit]
|
|
Description=Talk2Me Real-time Translation Service
|
|
Documentation=https://github.com/your-repo/talk2me
|
|
After=network.target
|
|
|
|
[Service]
|
|
Type=notify
|
|
User=talk2me
|
|
Group=talk2me
|
|
WorkingDirectory=/opt/talk2me
|
|
Environment="PATH=/opt/talk2me/venv/bin"
|
|
Environment="FLASK_ENV=production"
|
|
Environment="PYTHONUNBUFFERED=1"
|
|
|
|
# Production environment variables
|
|
EnvironmentFile=-/opt/talk2me/.env
|
|
|
|
# Gunicorn command with production settings
|
|
ExecStart=/opt/talk2me/venv/bin/gunicorn \
|
|
--config /opt/talk2me/gunicorn_config.py \
|
|
--error-logfile /var/log/talk2me/gunicorn-error.log \
|
|
--access-logfile /var/log/talk2me/gunicorn-access.log \
|
|
--log-level info \
|
|
wsgi:application
|
|
|
|
# Reload via SIGHUP
|
|
ExecReload=/bin/kill -s HUP $MAINPID
|
|
|
|
# Graceful stop
|
|
KillMode=mixed
|
|
TimeoutStopSec=30
|
|
|
|
# Restart policy
|
|
Restart=always
|
|
RestartSec=10
|
|
StartLimitBurst=3
|
|
StartLimitInterval=60
|
|
|
|
# Security settings
|
|
NoNewPrivileges=true
|
|
PrivateTmp=true
|
|
ProtectSystem=strict
|
|
ProtectHome=true
|
|
ProtectKernelTunables=true
|
|
ProtectKernelModules=true
|
|
ProtectControlGroups=true
|
|
RestrictRealtime=true
|
|
RestrictSUIDSGID=true
|
|
LockPersonality=true
|
|
|
|
# Allow writing to specific directories
|
|
ReadWritePaths=/var/log/talk2me /tmp/talk2me_uploads
|
|
|
|
# Resource limits
|
|
LimitNOFILE=65536
|
|
LimitNPROC=4096
|
|
|
|
# Memory limits (adjust based on your system)
|
|
MemoryLimit=4G
|
|
MemoryHigh=3G
|
|
|
|
# CPU limits (optional)
|
|
# CPUQuota=200%
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target |