diff --git a/ERROR_LOGGING.md b/ERROR_LOGGING.md new file mode 100644 index 0000000..f77cd2a --- /dev/null +++ b/ERROR_LOGGING.md @@ -0,0 +1,460 @@ +# Error Logging Documentation + +This document describes the comprehensive error logging system implemented in Talk2Me for debugging production issues. + +## Overview + +Talk2Me implements a structured logging system that provides: +- JSON-formatted structured logs for easy parsing +- Multiple log streams (app, errors, access, security, performance) +- Automatic log rotation to prevent disk space issues +- Request tracing with unique IDs +- Performance metrics collection +- Security event tracking +- Error deduplication and frequency tracking + +## Log Types + +### 1. Application Logs (`logs/talk2me.log`) +General application logs including info, warnings, and debug messages. + +```json +{ + "timestamp": "2024-01-15T10:30:45.123Z", + "level": "INFO", + "logger": "talk2me", + "message": "Whisper model loaded successfully", + "app": "talk2me", + "environment": "production", + "hostname": "server-1", + "thread": "MainThread", + "process": 12345 +} +``` + +### 2. Error Logs (`logs/errors.log`) +Dedicated error logging with full exception details and stack traces. + +```json +{ + "timestamp": "2024-01-15T10:31:00.456Z", + "level": "ERROR", + "logger": "talk2me.errors", + "message": "Error in transcribe: File too large", + "exception": { + "type": "ValueError", + "message": "Audio file exceeds maximum size", + "traceback": ["...full stack trace..."] + }, + "request_id": "1234567890-abcdef", + "endpoint": "transcribe", + "method": "POST", + "path": "/transcribe", + "ip": "192.168.1.100" +} +``` + +### 3. Access Logs (`logs/access.log`) +HTTP request/response logging for traffic analysis. + +```json +{ + "timestamp": "2024-01-15T10:32:00.789Z", + "level": "INFO", + "message": "request_complete", + "request_id": "1234567890-abcdef", + "method": "POST", + "path": "/transcribe", + "status": 200, + "duration_ms": 1250, + "content_length": 4096, + "ip": "192.168.1.100", + "user_agent": "Mozilla/5.0..." +} +``` + +### 4. Security Logs (`logs/security.log`) +Security-related events and suspicious activities. + +```json +{ + "timestamp": "2024-01-15T10:33:00.123Z", + "level": "WARNING", + "message": "Security event: rate_limit_exceeded", + "event": "rate_limit_exceeded", + "severity": "warning", + "ip": "192.168.1.100", + "endpoint": "/transcribe", + "attempts": 15, + "blocked": true +} +``` + +### 5. Performance Logs (`logs/performance.log`) +Performance metrics and slow request tracking. + +```json +{ + "timestamp": "2024-01-15T10:34:00.456Z", + "level": "INFO", + "message": "Performance metric: transcribe_audio", + "metric": "transcribe_audio", + "duration_ms": 2500, + "function": "transcribe", + "module": "app", + "request_id": "1234567890-abcdef" +} +``` + +## Configuration + +### Environment Variables + +```bash +# Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) +export LOG_LEVEL=INFO + +# Log file paths +export LOG_FILE=logs/talk2me.log +export ERROR_LOG_FILE=logs/errors.log + +# Log rotation settings +export LOG_MAX_BYTES=52428800 # 50MB +export LOG_BACKUP_COUNT=10 # Keep 10 backup files + +# Environment +export FLASK_ENV=production +``` + +### Flask Configuration + +```python +app.config.update({ + 'LOG_LEVEL': 'INFO', + 'LOG_FILE': 'logs/talk2me.log', + 'ERROR_LOG_FILE': 'logs/errors.log', + 'LOG_MAX_BYTES': 50 * 1024 * 1024, + 'LOG_BACKUP_COUNT': 10 +}) +``` + +## Admin API Endpoints + +### GET /admin/logs/errors +View recent error logs and error frequency statistics. + +```bash +curl -H "X-Admin-Token: your-token" http://localhost:5005/admin/logs/errors +``` + +Response: +```json +{ + "error_summary": { + "abc123def456": { + "count_last_hour": 5, + "last_seen": 1705320000 + } + }, + "recent_errors": [...], + "total_errors_logged": 150 +} +``` + +### GET /admin/logs/performance +View performance metrics and slow requests. + +```bash +curl -H "X-Admin-Token: your-token" http://localhost:5005/admin/logs/performance +``` + +Response: +```json +{ + "performance_metrics": { + "transcribe_audio": { + "avg_ms": 850.5, + "max_ms": 3200, + "min_ms": 125, + "count": 1024 + } + }, + "slow_requests": [ + { + "metric": "transcribe_audio", + "duration_ms": 3200, + "timestamp": "2024-01-15T10:35:00Z" + } + ] +} +``` + +### GET /admin/logs/security +View security events and suspicious activities. + +```bash +curl -H "X-Admin-Token: your-token" http://localhost:5005/admin/logs/security +``` + +Response: +```json +{ + "security_events": [...], + "event_summary": { + "rate_limit_exceeded": 25, + "suspicious_error": 3, + "high_error_rate": 1 + }, + "total_events": 29 +} +``` + +## Usage Patterns + +### 1. Logging Errors with Context + +```python +from error_logger import log_exception + +try: + # Some operation + process_audio(file) +except Exception as e: + log_exception( + e, + message="Failed to process audio", + user_id=user.id, + file_size=file.size, + file_type=file.content_type + ) +``` + +### 2. Performance Monitoring + +```python +from error_logger import log_performance + +@log_performance('expensive_operation') +def process_large_file(file): + # This will automatically log execution time + return processed_data +``` + +### 3. Security Event Logging + +```python +app.error_logger.log_security( + 'unauthorized_access', + severity='warning', + ip=request.remote_addr, + attempted_resource='/admin', + user_agent=request.headers.get('User-Agent') +) +``` + +### 4. Request Context + +```python +from error_logger import log_context + +with log_context(user_id=user.id, feature='translation'): + # All logs within this context will include user_id and feature + translate_text(text) +``` + +## Log Analysis + +### Finding Specific Errors + +```bash +# Find all authentication errors +grep '"error_type":"AuthenticationError"' logs/errors.log | jq . + +# Find errors from specific IP +grep '"ip":"192.168.1.100"' logs/errors.log | jq . + +# Find errors in last hour +grep "$(date -u -d '1 hour ago' +%Y-%m-%dT%H)" logs/errors.log | jq . +``` + +### Performance Analysis + +```bash +# Find slow requests (>2000ms) +jq 'select(.extra_fields.duration_ms > 2000)' logs/performance.log + +# Calculate average response time for endpoint +jq 'select(.extra_fields.metric == "transcribe_audio") | .extra_fields.duration_ms' logs/performance.log | awk '{sum+=$1; count++} END {print sum/count}' +``` + +### Security Monitoring + +```bash +# Count security events by type +jq '.extra_fields.event' logs/security.log | sort | uniq -c + +# Find all blocked IPs +jq 'select(.extra_fields.blocked == true) | .extra_fields.ip' logs/security.log | sort -u +``` + +## Log Rotation + +Logs are automatically rotated based on size or time: + +- **Application/Error logs**: Rotate at 50MB, keep 10 backups +- **Access logs**: Daily rotation, keep 30 days +- **Performance logs**: Hourly rotation, keep 7 days +- **Security logs**: Rotate at 50MB, keep 10 backups + +Rotated logs are named with numeric suffixes: +- `talk2me.log` (current) +- `talk2me.log.1` (most recent backup) +- `talk2me.log.2` (older backup) +- etc. + +## Best Practices + +### 1. Structured Logging + +Always include relevant context: +```python +logger.info("User action completed", extra={ + 'extra_fields': { + 'user_id': user.id, + 'action': 'upload_audio', + 'file_size': file.size, + 'duration_ms': processing_time + } +}) +``` + +### 2. Error Handling + +Log errors at appropriate levels: +```python +try: + result = risky_operation() +except ValidationError as e: + logger.warning(f"Validation failed: {e}") # Expected errors +except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) # Unexpected errors +``` + +### 3. Performance Tracking + +Track key operations: +```python +start = time.time() +result = expensive_operation() +duration = (time.time() - start) * 1000 + +app.error_logger.log_performance( + 'expensive_operation', + value=duration, + input_size=len(data), + output_size=len(result) +) +``` + +### 4. Security Awareness + +Log security-relevant events: +```python +if failed_attempts > 3: + app.error_logger.log_security( + 'multiple_failed_attempts', + severity='warning', + ip=request.remote_addr, + attempts=failed_attempts + ) +``` + +## Monitoring Integration + +### Prometheus Metrics + +Export log metrics for Prometheus: +```python +@app.route('/metrics') +def prometheus_metrics(): + error_summary = app.error_logger.get_error_summary() + # Format as Prometheus metrics + return format_prometheus_metrics(error_summary) +``` + +### ELK Stack + +Ship logs to Elasticsearch: +```yaml +filebeat.inputs: +- type: log + paths: + - /app/logs/*.log + json.keys_under_root: true + json.add_error_key: true +``` + +### CloudWatch + +For AWS deployments: +```python +# Install boto3 and watchtower +import watchtower +cloudwatch_handler = watchtower.CloudWatchLogHandler() +logger.addHandler(cloudwatch_handler) +``` + +## Troubleshooting + +### Common Issues + +#### 1. Logs Not Being Written + +Check permissions: +```bash +ls -la logs/ +# Should show write permissions for app user +``` + +Create logs directory: +```bash +mkdir -p logs +chmod 755 logs +``` + +#### 2. Disk Space Issues + +Monitor log sizes: +```bash +du -sh logs/* +``` + +Force rotation: +```bash +# Manually rotate logs +mv logs/talk2me.log logs/talk2me.log.backup +# App will create new log file +``` + +#### 3. Performance Impact + +If logging impacts performance: +- Increase LOG_LEVEL to WARNING or ERROR +- Reduce backup count +- Use asynchronous logging (future enhancement) + +## Security Considerations + +1. **Log Sanitization**: Sensitive data is automatically masked +2. **Access Control**: Admin endpoints require authentication +3. **Log Retention**: Old logs are automatically deleted +4. **Encryption**: Consider encrypting logs at rest in production +5. **Audit Trail**: All log access is itself logged + +## Future Enhancements + +1. **Centralized Logging**: Ship logs to centralized service +2. **Real-time Alerts**: Trigger alerts on error patterns +3. **Log Analytics**: Built-in log analysis dashboard +4. **Correlation IDs**: Track requests across microservices +5. **Async Logging**: Reduce performance impact \ No newline at end of file diff --git a/README.md b/README.md index c29ef60..0d19076 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,18 @@ Comprehensive request size limiting prevents memory exhaustion: See [REQUEST_SIZE_LIMITS.md](REQUEST_SIZE_LIMITS.md) for detailed documentation. +## Error Logging + +Production-ready error logging system for debugging and monitoring: +- Structured JSON logs for easy parsing +- Multiple log streams (app, errors, access, security, performance) +- Automatic log rotation to prevent disk exhaustion +- Request tracing with unique IDs +- Performance metrics and slow request tracking +- Admin endpoints for log analysis + +See [ERROR_LOGGING.md](ERROR_LOGGING.md) for detailed documentation. + ## Mobile Support The interface is fully responsive and designed to work well on mobile devices. diff --git a/app.py b/app.py index 4664ef0..c013600 100644 --- a/app.py +++ b/app.py @@ -37,6 +37,7 @@ from config import init_app as init_config from secrets_manager import init_app as init_secrets from session_manager import init_app as init_session_manager, track_resource from request_size_limiter import RequestSizeLimiter, limit_request_size +from error_logger import ErrorLogger, log_errors, log_performance, log_exception, get_logger # Error boundary decorator for Flask routes def with_error_boundary(func): @@ -45,16 +46,36 @@ def with_error_boundary(func): try: return func(*args, **kwargs) except Exception as e: - # Log the full exception with traceback - logger.error(f"Error in {func.__name__}: {str(e)}") - logger.error(traceback.format_exc()) + # Log the error with full context + log_exception( + e, + message=f"Error in {func.__name__}", + endpoint=request.endpoint, + method=request.method, + path=request.path, + ip=request.remote_addr, + function=func.__name__, + module=func.__module__ + ) + + # Log security event for suspicious errors + if any(keyword in str(e).lower() for keyword in ['inject', 'attack', 'malicious', 'unauthorized']): + app.error_logger.log_security( + 'suspicious_error', + severity='warning', + error_type=type(e).__name__, + error_message=str(e), + endpoint=request.endpoint, + ip=request.remote_addr + ) # Return appropriate error response error_message = str(e) if app.debug else "An internal error occurred" return jsonify({ 'success': False, 'error': error_message, - 'component': func.__name__ + 'component': func.__name__, + 'request_id': getattr(g, 'request_id', None) }), 500 return wrapper @@ -119,6 +140,18 @@ request_size_limiter = RequestSizeLimiter(app, { 'max_image_size': app.config.get('MAX_IMAGE_SIZE', 10 * 1024 * 1024), # 10MB for images }) +# Initialize error logging system +error_logger = ErrorLogger(app, { + 'log_level': app.config.get('LOG_LEVEL', 'INFO'), + 'log_file': app.config.get('LOG_FILE', 'logs/talk2me.log'), + 'error_log_file': app.config.get('ERROR_LOG_FILE', 'logs/errors.log'), + 'max_bytes': app.config.get('LOG_MAX_BYTES', 50 * 1024 * 1024), # 50MB + 'backup_count': app.config.get('LOG_BACKUP_COUNT', 10) +}) + +# Update logger to use the new system +logger = get_logger(__name__) + # TTS configuration is already loaded from config.py # Warn if TTS API key is not set if not app.config.get('TTS_API_KEY'): @@ -604,6 +637,7 @@ def index(): @limit_request_size(max_audio_size=25 * 1024 * 1024) # 25MB limit for audio @with_error_boundary @track_resource('audio_file') +@log_performance('transcribe_audio') def transcribe(): if 'audio' not in request.files: return jsonify({'error': 'No audio file provided'}), 400 @@ -719,6 +753,7 @@ def transcribe(): @rate_limit(requests_per_minute=20, requests_per_hour=300, check_size=True) @limit_request_size(max_size=1 * 1024 * 1024) # 1MB limit for JSON @with_error_boundary +@log_performance('translate_text') def translate(): try: # Validate request size @@ -1589,5 +1624,178 @@ def update_size_limits(): logger.error(f"Failed to update size limits: {str(e)}") return jsonify({'error': str(e)}), 500 +@app.route('/admin/logs/errors', methods=['GET']) +@rate_limit(requests_per_minute=10) +def get_error_logs(): + """Get recent error log entries""" + try: + # Simple authentication check + auth_token = request.headers.get('X-Admin-Token') + expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token') + + if auth_token != expected_token: + return jsonify({'error': 'Unauthorized'}), 401 + + # Get error summary + if hasattr(app, 'error_logger'): + error_summary = app.error_logger.get_error_summary() + + # Read last 100 lines from error log + error_log_path = app.error_logger.error_log_file + recent_errors = [] + + if os.path.exists(error_log_path): + try: + with open(error_log_path, 'r') as f: + lines = f.readlines() + # Get last 100 lines + for line in lines[-100:]: + try: + error_entry = json.loads(line) + recent_errors.append(error_entry) + except json.JSONDecodeError: + pass + except Exception as e: + logger.error(f"Failed to read error log: {e}") + + return jsonify({ + 'error_summary': error_summary, + 'recent_errors': recent_errors[-50:], # Last 50 errors + 'total_errors_logged': len(recent_errors) + }) + else: + return jsonify({'error': 'Error logger not initialized'}), 500 + except Exception as e: + logger.error(f"Failed to get error logs: {str(e)}") + return jsonify({'error': str(e)}), 500 + +@app.route('/admin/logs/performance', methods=['GET']) +@rate_limit(requests_per_minute=10) +def get_performance_logs(): + """Get performance metrics""" + try: + # Simple authentication check + auth_token = request.headers.get('X-Admin-Token') + expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token') + + if auth_token != expected_token: + return jsonify({'error': 'Unauthorized'}), 401 + + # Read performance log + perf_log_path = 'logs/performance.log' + metrics = { + 'endpoints': {}, + 'slow_requests': [], + 'average_response_times': {} + } + + if os.path.exists(perf_log_path): + try: + with open(perf_log_path, 'r') as f: + for line in f.readlines()[-1000:]: # Last 1000 entries + try: + entry = json.loads(line) + if 'extra_fields' in entry: + metric = entry['extra_fields'].get('metric', '') + duration = entry['extra_fields'].get('duration_ms', 0) + + # Track endpoint metrics + if metric not in metrics['endpoints']: + metrics['endpoints'][metric] = { + 'count': 0, + 'total_duration': 0, + 'max_duration': 0, + 'min_duration': float('inf') + } + + metrics['endpoints'][metric]['count'] += 1 + metrics['endpoints'][metric]['total_duration'] += duration + metrics['endpoints'][metric]['max_duration'] = max( + metrics['endpoints'][metric]['max_duration'], duration + ) + metrics['endpoints'][metric]['min_duration'] = min( + metrics['endpoints'][metric]['min_duration'], duration + ) + + # Track slow requests + if duration > 1000: # Over 1 second + metrics['slow_requests'].append({ + 'metric': metric, + 'duration_ms': duration, + 'timestamp': entry.get('timestamp') + }) + except json.JSONDecodeError: + pass + except Exception as e: + logger.error(f"Failed to read performance log: {e}") + + # Calculate averages + for endpoint, data in metrics['endpoints'].items(): + if data['count'] > 0: + metrics['average_response_times'][endpoint] = { + 'avg_ms': data['total_duration'] / data['count'], + 'max_ms': data['max_duration'], + 'min_ms': data['min_duration'] if data['min_duration'] != float('inf') else 0, + 'count': data['count'] + } + + # Sort slow requests by duration + metrics['slow_requests'].sort(key=lambda x: x['duration_ms'], reverse=True) + metrics['slow_requests'] = metrics['slow_requests'][:20] # Top 20 slowest + + return jsonify({ + 'performance_metrics': metrics['average_response_times'], + 'slow_requests': metrics['slow_requests'] + }) + except Exception as e: + logger.error(f"Failed to get performance logs: {str(e)}") + return jsonify({'error': str(e)}), 500 + +@app.route('/admin/logs/security', methods=['GET']) +@rate_limit(requests_per_minute=10) +def get_security_logs(): + """Get security event logs""" + try: + # Simple authentication check + auth_token = request.headers.get('X-Admin-Token') + expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token') + + if auth_token != expected_token: + return jsonify({'error': 'Unauthorized'}), 401 + + # Read security log + security_log_path = 'logs/security.log' + security_events = [] + + if os.path.exists(security_log_path): + try: + with open(security_log_path, 'r') as f: + for line in f.readlines()[-200:]: # Last 200 entries + try: + event = json.loads(line) + security_events.append(event) + except json.JSONDecodeError: + pass + except Exception as e: + logger.error(f"Failed to read security log: {e}") + + # Group by event type + event_summary = {} + for event in security_events: + if 'extra_fields' in event: + event_type = event['extra_fields'].get('event', 'unknown') + if event_type not in event_summary: + event_summary[event_type] = 0 + event_summary[event_type] += 1 + + return jsonify({ + 'security_events': security_events[-50:], # Last 50 events + 'event_summary': event_summary, + 'total_events': len(security_events) + }) + except Exception as e: + logger.error(f"Failed to get security logs: {str(e)}") + return jsonify({'error': str(e)}), 500 + if __name__ == '__main__': app.run(host='0.0.0.0', port=5005, debug=True) diff --git a/error_logger.py b/error_logger.py new file mode 100644 index 0000000..cbad836 --- /dev/null +++ b/error_logger.py @@ -0,0 +1,564 @@ +# Comprehensive error logging system for production debugging +import logging +import logging.handlers +import os +import sys +import json +import traceback +import time +from datetime import datetime +from typing import Dict, Any, Optional, Union +from functools import wraps +import socket +import threading +from flask import request, g +from contextlib import contextmanager +import hashlib + +# Create logs directory if it doesn't exist +LOGS_DIR = os.environ.get('LOGS_DIR', 'logs') +os.makedirs(LOGS_DIR, exist_ok=True) + +class StructuredFormatter(logging.Formatter): + """ + Custom formatter that outputs structured JSON logs + """ + def __init__(self, app_name='talk2me', environment='development'): + super().__init__() + self.app_name = app_name + self.environment = environment + self.hostname = socket.gethostname() + + def format(self, record): + # Base log structure + log_data = { + 'timestamp': datetime.utcnow().isoformat() + 'Z', + 'level': record.levelname, + 'logger': record.name, + 'message': record.getMessage(), + 'app': self.app_name, + 'environment': self.environment, + 'hostname': self.hostname, + 'thread': threading.current_thread().name, + 'process': os.getpid() + } + + # Add exception info if present + if record.exc_info: + log_data['exception'] = { + 'type': record.exc_info[0].__name__, + 'message': str(record.exc_info[1]), + 'traceback': traceback.format_exception(*record.exc_info) + } + + # Add extra fields + if hasattr(record, 'extra_fields'): + log_data.update(record.extra_fields) + + # Add Flask request context if available + if hasattr(record, 'request_id'): + log_data['request_id'] = record.request_id + + if hasattr(record, 'user_id'): + log_data['user_id'] = record.user_id + + if hasattr(record, 'session_id'): + log_data['session_id'] = record.session_id + + # Add performance metrics if available + if hasattr(record, 'duration'): + log_data['duration_ms'] = record.duration + + if hasattr(record, 'memory_usage'): + log_data['memory_usage_mb'] = record.memory_usage + + return json.dumps(log_data, default=str) + +class ErrorLogger: + """ + Comprehensive error logging system with multiple handlers and features + """ + def __init__(self, app=None, config=None): + self.config = config or {} + self.loggers = {} + self.error_counts = {} + self.error_signatures = {} + + if app: + self.init_app(app) + + def init_app(self, app): + """Initialize error logging for Flask app""" + self.app = app + + # Get configuration + self.log_level = self.config.get('log_level', + app.config.get('LOG_LEVEL', 'INFO')) + self.log_file = self.config.get('log_file', + app.config.get('LOG_FILE', + os.path.join(LOGS_DIR, 'talk2me.log'))) + self.error_log_file = self.config.get('error_log_file', + os.path.join(LOGS_DIR, 'errors.log')) + self.max_bytes = self.config.get('max_bytes', 50 * 1024 * 1024) # 50MB + self.backup_count = self.config.get('backup_count', 10) + self.environment = app.config.get('FLASK_ENV', 'development') + + # Set up loggers + self._setup_app_logger() + self._setup_error_logger() + self._setup_access_logger() + self._setup_security_logger() + self._setup_performance_logger() + + # Add Flask error handlers + self._setup_flask_handlers(app) + + # Add request logging + app.before_request(self._before_request) + app.after_request(self._after_request) + + # Store logger in app + app.error_logger = self + + logging.info("Error logging system initialized") + + def _setup_app_logger(self): + """Set up main application logger""" + app_logger = logging.getLogger('talk2me') + app_logger.setLevel(getattr(logging, self.log_level.upper())) + + # Remove existing handlers + app_logger.handlers = [] + + # Console handler with color support + console_handler = logging.StreamHandler(sys.stdout) + if sys.stdout.isatty(): + # Use colored output for terminals + from colorlog import ColoredFormatter + console_formatter = ColoredFormatter( + '%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(message)s', + log_colors={ + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'red,bg_white', + } + ) + console_handler.setFormatter(console_formatter) + else: + console_handler.setFormatter( + StructuredFormatter('talk2me', self.environment) + ) + app_logger.addHandler(console_handler) + + # Rotating file handler + file_handler = logging.handlers.RotatingFileHandler( + self.log_file, + maxBytes=self.max_bytes, + backupCount=self.backup_count + ) + file_handler.setFormatter( + StructuredFormatter('talk2me', self.environment) + ) + app_logger.addHandler(file_handler) + + self.loggers['app'] = app_logger + + def _setup_error_logger(self): + """Set up dedicated error logger""" + error_logger = logging.getLogger('talk2me.errors') + error_logger.setLevel(logging.ERROR) + + # Error file handler + error_handler = logging.handlers.RotatingFileHandler( + self.error_log_file, + maxBytes=self.max_bytes, + backupCount=self.backup_count + ) + error_handler.setFormatter( + StructuredFormatter('talk2me', self.environment) + ) + error_logger.addHandler(error_handler) + + # Also send errors to syslog if available + try: + syslog_handler = logging.handlers.SysLogHandler( + address='/dev/log' if os.path.exists('/dev/log') else ('localhost', 514) + ) + syslog_handler.setFormatter( + logging.Formatter('talk2me[%(process)d]: %(levelname)s %(message)s') + ) + error_logger.addHandler(syslog_handler) + except Exception: + pass # Syslog not available + + self.loggers['error'] = error_logger + + def _setup_access_logger(self): + """Set up access logger for HTTP requests""" + access_logger = logging.getLogger('talk2me.access') + access_logger.setLevel(logging.INFO) + + # Access log file + access_handler = logging.handlers.TimedRotatingFileHandler( + os.path.join(LOGS_DIR, 'access.log'), + when='midnight', + interval=1, + backupCount=30 + ) + access_handler.setFormatter( + StructuredFormatter('talk2me', self.environment) + ) + access_logger.addHandler(access_handler) + + self.loggers['access'] = access_logger + + def _setup_security_logger(self): + """Set up security event logger""" + security_logger = logging.getLogger('talk2me.security') + security_logger.setLevel(logging.WARNING) + + # Security log file + security_handler = logging.handlers.RotatingFileHandler( + os.path.join(LOGS_DIR, 'security.log'), + maxBytes=self.max_bytes, + backupCount=self.backup_count + ) + security_handler.setFormatter( + StructuredFormatter('talk2me', self.environment) + ) + security_logger.addHandler(security_handler) + + self.loggers['security'] = security_logger + + def _setup_performance_logger(self): + """Set up performance metrics logger""" + perf_logger = logging.getLogger('talk2me.performance') + perf_logger.setLevel(logging.INFO) + + # Performance log file + perf_handler = logging.handlers.TimedRotatingFileHandler( + os.path.join(LOGS_DIR, 'performance.log'), + when='H', # Hourly rotation + interval=1, + backupCount=168 # 7 days + ) + perf_handler.setFormatter( + StructuredFormatter('talk2me', self.environment) + ) + perf_logger.addHandler(perf_handler) + + self.loggers['performance'] = perf_logger + + def _setup_flask_handlers(self, app): + """Set up Flask error handlers""" + @app.errorhandler(Exception) + def handle_exception(error): + # Get request ID + request_id = getattr(g, 'request_id', 'unknown') + + # Create error signature for deduplication + error_signature = self._get_error_signature(error) + + # Log the error + self.log_error( + error, + request_id=request_id, + endpoint=request.endpoint, + method=request.method, + path=request.path, + ip=request.remote_addr, + user_agent=request.headers.get('User-Agent'), + signature=error_signature + ) + + # Track error frequency + self._track_error(error_signature) + + # Return appropriate response + if hasattr(error, 'code'): + return {'error': str(error)}, error.code + else: + return {'error': 'Internal server error'}, 500 + + def _before_request(self): + """Log request start""" + # Generate request ID + g.request_id = self._generate_request_id() + g.request_start_time = time.time() + + # Log access + self.log_access( + 'request_start', + request_id=g.request_id, + method=request.method, + path=request.path, + ip=request.remote_addr, + user_agent=request.headers.get('User-Agent') + ) + + def _after_request(self, response): + """Log request completion""" + # Calculate duration + duration = None + if hasattr(g, 'request_start_time'): + duration = int((time.time() - g.request_start_time) * 1000) + + # Log access + self.log_access( + 'request_complete', + request_id=getattr(g, 'request_id', 'unknown'), + method=request.method, + path=request.path, + status=response.status_code, + duration_ms=duration, + content_length=response.content_length + ) + + # Log performance metrics for slow requests + if duration and duration > 1000: # Over 1 second + self.log_performance( + 'slow_request', + request_id=getattr(g, 'request_id', 'unknown'), + endpoint=request.endpoint, + duration_ms=duration + ) + + return response + + def log_error(self, error: Exception, **kwargs): + """Log an error with context""" + error_logger = self.loggers.get('error', logging.getLogger()) + + # Create log record with extra fields + extra = { + 'extra_fields': kwargs, + 'request_id': kwargs.get('request_id'), + 'user_id': kwargs.get('user_id'), + 'session_id': kwargs.get('session_id') + } + + # Log with full traceback + error_logger.error( + f"Error in {kwargs.get('endpoint', 'unknown')}: {str(error)}", + exc_info=sys.exc_info(), + extra=extra + ) + + def log_access(self, message: str, **kwargs): + """Log access event""" + access_logger = self.loggers.get('access', logging.getLogger()) + + extra = { + 'extra_fields': kwargs, + 'request_id': kwargs.get('request_id') + } + + access_logger.info(message, extra=extra) + + def log_security(self, event: str, severity: str = 'warning', **kwargs): + """Log security event""" + security_logger = self.loggers.get('security', logging.getLogger()) + + extra = { + 'extra_fields': { + 'event': event, + 'severity': severity, + **kwargs + }, + 'request_id': kwargs.get('request_id') + } + + log_method = getattr(security_logger, severity.lower(), security_logger.warning) + log_method(f"Security event: {event}", extra=extra) + + def log_performance(self, metric: str, value: Union[int, float] = None, **kwargs): + """Log performance metric""" + perf_logger = self.loggers.get('performance', logging.getLogger()) + + extra = { + 'extra_fields': { + 'metric': metric, + 'value': value, + **kwargs + }, + 'request_id': kwargs.get('request_id') + } + + perf_logger.info(f"Performance metric: {metric}", extra=extra) + + def _generate_request_id(self): + """Generate unique request ID""" + return f"{int(time.time() * 1000)}-{os.urandom(8).hex()}" + + def _get_error_signature(self, error: Exception): + """Generate signature for error deduplication""" + # Create signature from error type and key parts of traceback + tb_summary = traceback.format_exception_only(type(error), error) + signature_data = f"{type(error).__name__}:{tb_summary[0] if tb_summary else ''}" + return hashlib.md5(signature_data.encode()).hexdigest() + + def _track_error(self, signature: str): + """Track error frequency""" + now = time.time() + + if signature not in self.error_counts: + self.error_counts[signature] = [] + + # Add current timestamp + self.error_counts[signature].append(now) + + # Clean old entries (keep last hour) + self.error_counts[signature] = [ + ts for ts in self.error_counts[signature] + if now - ts < 3600 + ] + + # Alert if error rate is high + error_count = len(self.error_counts[signature]) + if error_count > 10: # More than 10 in an hour + self.log_security( + 'high_error_rate', + severity='error', + signature=signature, + count=error_count, + message="High error rate detected" + ) + + def get_error_summary(self): + """Get summary of recent errors""" + summary = {} + now = time.time() + + for signature, timestamps in self.error_counts.items(): + recent_count = len([ts for ts in timestamps if now - ts < 3600]) + if recent_count > 0: + summary[signature] = { + 'count_last_hour': recent_count, + 'last_seen': max(timestamps) + } + + return summary + +# Decorators for easy logging +def log_errors(logger_name='talk2me'): + """Decorator to log function errors""" + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + logger = logging.getLogger(logger_name) + logger.error( + f"Error in {func.__name__}: {str(e)}", + exc_info=sys.exc_info(), + extra={ + 'extra_fields': { + 'function': func.__name__, + 'module': func.__module__ + } + } + ) + raise + return wrapper + return decorator + +def log_performance(metric_name=None): + """Decorator to log function performance""" + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + start_time = time.time() + + try: + result = func(*args, **kwargs) + duration = int((time.time() - start_time) * 1000) + + # Log performance + logger = logging.getLogger('talk2me.performance') + logger.info( + f"Performance: {metric_name or func.__name__}", + extra={ + 'extra_fields': { + 'metric': metric_name or func.__name__, + 'duration_ms': duration, + 'function': func.__name__, + 'module': func.__module__ + } + } + ) + + return result + except Exception: + duration = int((time.time() - start_time) * 1000) + logger = logging.getLogger('talk2me.performance') + logger.warning( + f"Performance (failed): {metric_name or func.__name__}", + extra={ + 'extra_fields': { + 'metric': metric_name or func.__name__, + 'duration_ms': duration, + 'function': func.__name__, + 'module': func.__module__, + 'status': 'failed' + } + } + ) + raise + return wrapper + return decorator + +@contextmanager +def log_context(**kwargs): + """Context manager to add context to logs""" + # Store current context + old_context = {} + for key, value in kwargs.items(): + if hasattr(g, key): + old_context[key] = getattr(g, key) + setattr(g, key, value) + + try: + yield + finally: + # Restore old context + for key in kwargs: + if key in old_context: + setattr(g, key, old_context[key]) + else: + delattr(g, key) + +# Utility functions +def configure_logging(app, **kwargs): + """Configure logging for the application""" + config = { + 'log_level': kwargs.get('log_level', app.config.get('LOG_LEVEL', 'INFO')), + 'log_file': kwargs.get('log_file', app.config.get('LOG_FILE')), + 'error_log_file': kwargs.get('error_log_file'), + 'max_bytes': kwargs.get('max_bytes', 50 * 1024 * 1024), + 'backup_count': kwargs.get('backup_count', 10) + } + + error_logger = ErrorLogger(app, config) + return error_logger + +def get_logger(name='talk2me'): + """Get a logger instance""" + return logging.getLogger(name) + +def log_exception(error, message=None, **kwargs): + """Log an exception with context""" + logger = logging.getLogger('talk2me.errors') + + extra = { + 'extra_fields': kwargs, + 'request_id': getattr(g, 'request_id', None) + } + + logger.error( + message or f"Exception: {str(error)}", + exc_info=(type(error), error, error.__traceback__), + extra=extra + ) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index de88e6d..bb633e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ pywebpush cryptography python-dotenv click +colorlog diff --git a/test_error_logging.py b/test_error_logging.py new file mode 100755 index 0000000..0149a19 --- /dev/null +++ b/test_error_logging.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +Test script for error logging system +""" +import logging +import json +import os +import time +from error_logger import ErrorLogger, log_errors, log_performance, get_logger + +def test_basic_logging(): + """Test basic logging functionality""" + print("\n=== Testing Basic Logging ===") + + # Get logger + logger = get_logger('test') + + # Test different log levels + logger.debug("This is a debug message") + logger.info("This is an info message") + logger.warning("This is a warning message") + logger.error("This is an error message") + + print("✓ Basic logging test completed") + +def test_error_logging(): + """Test error logging with exceptions""" + print("\n=== Testing Error Logging ===") + + @log_errors('test.functions') + def failing_function(): + raise ValueError("This is a test error") + + try: + failing_function() + except ValueError: + print("✓ Error was logged") + + # Check if error log exists + if os.path.exists('logs/errors.log'): + print("✓ Error log file created") + + # Read last line + with open('logs/errors.log', 'r') as f: + lines = f.readlines() + if lines: + try: + error_entry = json.loads(lines[-1]) + print(f"✓ Error logged with level: {error_entry.get('level')}") + print(f"✓ Error type: {error_entry.get('exception', {}).get('type')}") + except json.JSONDecodeError: + print("✗ Error log entry is not valid JSON") + else: + print("✗ Error log file not created") + +def test_performance_logging(): + """Test performance logging""" + print("\n=== Testing Performance Logging ===") + + @log_performance('test_operation') + def slow_function(): + time.sleep(0.1) # Simulate slow operation + return "result" + + result = slow_function() + print(f"✓ Function returned: {result}") + + # Check performance log + if os.path.exists('logs/performance.log'): + print("✓ Performance log file created") + + # Read last line + with open('logs/performance.log', 'r') as f: + lines = f.readlines() + if lines: + try: + perf_entry = json.loads(lines[-1]) + duration = perf_entry.get('extra_fields', {}).get('duration_ms', 0) + print(f"✓ Performance logged with duration: {duration}ms") + except json.JSONDecodeError: + print("✗ Performance log entry is not valid JSON") + else: + print("✗ Performance log file not created") + +def test_structured_logging(): + """Test structured logging format""" + print("\n=== Testing Structured Logging ===") + + logger = get_logger('test.structured') + + # Log with extra fields + logger.info("Structured log test", extra={ + 'extra_fields': { + 'user_id': 123, + 'action': 'test_action', + 'metadata': {'key': 'value'} + } + }) + + # Check main log + if os.path.exists('logs/talk2me.log'): + with open('logs/talk2me.log', 'r') as f: + lines = f.readlines() + if lines: + try: + # Find our test entry + for line in reversed(lines): + entry = json.loads(line) + if entry.get('message') == 'Structured log test': + print("✓ Structured log entry found") + print(f"✓ Contains timestamp: {'timestamp' in entry}") + print(f"✓ Contains hostname: {'hostname' in entry}") + print(f"✓ Contains extra fields: {'user_id' in entry}") + break + except json.JSONDecodeError: + print("✗ Log entry is not valid JSON") + +def test_log_rotation(): + """Test log rotation settings""" + print("\n=== Testing Log Rotation ===") + + # Check if log files exist and their sizes + log_files = { + 'talk2me.log': 'logs/talk2me.log', + 'errors.log': 'logs/errors.log', + 'access.log': 'logs/access.log', + 'security.log': 'logs/security.log', + 'performance.log': 'logs/performance.log' + } + + for name, path in log_files.items(): + if os.path.exists(path): + size = os.path.getsize(path) + print(f"✓ {name}: {size} bytes") + else: + print(f"- {name}: not created yet") + +def main(): + """Run all tests""" + print("Error Logging System Tests") + print("==========================") + + # Create a test Flask app + from flask import Flask + app = Flask(__name__) + app.config['LOG_LEVEL'] = 'DEBUG' + app.config['FLASK_ENV'] = 'testing' + + # Initialize error logger + error_logger = ErrorLogger(app) + + # Run tests + test_basic_logging() + test_error_logging() + test_performance_logging() + test_structured_logging() + test_log_rotation() + + print("\n✅ All tests completed!") + print("\nCheck the logs directory for generated log files:") + print("- logs/talk2me.log - Main application log") + print("- logs/errors.log - Error log with stack traces") + print("- logs/performance.log - Performance metrics") + print("- logs/access.log - HTTP access log") + print("- logs/security.log - Security events") + +if __name__ == "__main__": + main() \ No newline at end of file