Implement proper error logging - Critical for debugging production issues

This comprehensive error logging system provides structured logging, automatic rotation, and detailed tracking for production debugging. Key features: - Structured JSON logging for easy parsing and analysis - Multiple log streams: app, errors, access, security, performance - Automatic log rotation to prevent disk space exhaustion - Request tracing with unique IDs for debugging - Performance metrics collection with slow request tracking - Security event logging for suspicious activities - Error deduplication and frequency tracking - Full exception details with stack traces Implementation details: - StructuredFormatter outputs JSON-formatted logs - ErrorLogger manages multiple specialized loggers - Rotating file handlers prevent disk space issues - Request context automatically included in logs - Performance decorator tracks execution times - Security events logged for audit trails - Admin endpoints for log analysis Admin endpoints: - GET /admin/logs/errors - View recent errors and frequencies - GET /admin/logs/performance - View performance metrics - GET /admin/logs/security - View security events Log types: - talk2me.log - General application logs - errors.log - Dedicated error logging with stack traces - access.log - HTTP request/response logs - security.log - Security events and suspicious activities - performance.log - Performance metrics and timing This provides production-grade observability critical for debugging issues, monitoring performance, and maintaining security in production environments. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-03 08:11:26 -06:00
parent aec2d3b0aa
commit 92b7c41f61
6 changed files with 1417 additions and 4 deletions
--- a/app.py
+++ b/app.py
@@ -37,6 +37,7 @@ from config import init_app as init_config
 from secrets_manager import init_app as init_secrets
 from session_manager import init_app as init_session_manager, track_resource
 from request_size_limiter import RequestSizeLimiter, limit_request_size
+from error_logger import ErrorLogger, log_errors, log_performance, log_exception, get_logger

 # Error boundary decorator for Flask routes
 def with_error_boundary(func):
@@ -45,16 +46,36 @@ def with_error_boundary(func):
        try:
            return func(*args, **kwargs)
        except Exception as e:
-            # Log the full exception with traceback
-            logger.error(f"Error in {func.__name__}: {str(e)}")
-            logger.error(traceback.format_exc())
+            # Log the error with full context
+            log_exception(
+                e,
+                message=f"Error in {func.__name__}",
+                endpoint=request.endpoint,
+                method=request.method,
+                path=request.path,
+                ip=request.remote_addr,
+                function=func.__name__,
+                module=func.__module__
+            )
+            
+            # Log security event for suspicious errors
+            if any(keyword in str(e).lower() for keyword in ['inject', 'attack', 'malicious', 'unauthorized']):
+                app.error_logger.log_security(
+                    'suspicious_error',
+                    severity='warning',
+                    error_type=type(e).__name__,
+                    error_message=str(e),
+                    endpoint=request.endpoint,
+                    ip=request.remote_addr
+                )
            
            # Return appropriate error response
            error_message = str(e) if app.debug else "An internal error occurred"
            return jsonify({
                'success': False,
                'error': error_message,
-                'component': func.__name__
+                'component': func.__name__,
+                'request_id': getattr(g, 'request_id', None)
            }), 500
    return wrapper

@@ -119,6 +140,18 @@ request_size_limiter = RequestSizeLimiter(app, {
    'max_image_size': app.config.get('MAX_IMAGE_SIZE', 10 * 1024 * 1024),          # 10MB for images
 })

+# Initialize error logging system
+error_logger = ErrorLogger(app, {
+    'log_level': app.config.get('LOG_LEVEL', 'INFO'),
+    'log_file': app.config.get('LOG_FILE', 'logs/talk2me.log'),
+    'error_log_file': app.config.get('ERROR_LOG_FILE', 'logs/errors.log'),
+    'max_bytes': app.config.get('LOG_MAX_BYTES', 50 * 1024 * 1024),  # 50MB
+    'backup_count': app.config.get('LOG_BACKUP_COUNT', 10)
+})
+
+# Update logger to use the new system
+logger = get_logger(__name__)
+
 # TTS configuration is already loaded from config.py
 # Warn if TTS API key is not set
 if not app.config.get('TTS_API_KEY'):
@@ -604,6 +637,7 @@ def index():
@limit_request_size(max_audio_size=25 * 1024 * 1024)  # 25MB limit for audio
@with_error_boundary
@track_resource('audio_file')
+@log_performance('transcribe_audio')
 def transcribe():
    if 'audio' not in request.files:
        return jsonify({'error': 'No audio file provided'}), 400
@@ -719,6 +753,7 @@ def transcribe():
@rate_limit(requests_per_minute=20, requests_per_hour=300, check_size=True)
@limit_request_size(max_size=1 * 1024 * 1024)  # 1MB limit for JSON
@with_error_boundary
+@log_performance('translate_text')
 def translate():
    try:
        # Validate request size
@@ -1589,5 +1624,178 @@ def update_size_limits():
        logger.error(f"Failed to update size limits: {str(e)}")
        return jsonify({'error': str(e)}), 500

+@app.route('/admin/logs/errors', methods=['GET'])
+@rate_limit(requests_per_minute=10)
+def get_error_logs():
+    """Get recent error log entries"""
+    try:
+        # Simple authentication check
+        auth_token = request.headers.get('X-Admin-Token')
+        expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
+        
+        if auth_token != expected_token:
+            return jsonify({'error': 'Unauthorized'}), 401
+        
+        # Get error summary
+        if hasattr(app, 'error_logger'):
+            error_summary = app.error_logger.get_error_summary()
+            
+            # Read last 100 lines from error log
+            error_log_path = app.error_logger.error_log_file
+            recent_errors = []
+            
+            if os.path.exists(error_log_path):
+                try:
+                    with open(error_log_path, 'r') as f:
+                        lines = f.readlines()
+                        # Get last 100 lines
+                        for line in lines[-100:]:
+                            try:
+                                error_entry = json.loads(line)
+                                recent_errors.append(error_entry)
+                            except json.JSONDecodeError:
+                                pass
+                except Exception as e:
+                    logger.error(f"Failed to read error log: {e}")
+            
+            return jsonify({
+                'error_summary': error_summary,
+                'recent_errors': recent_errors[-50:],  # Last 50 errors
+                'total_errors_logged': len(recent_errors)
+            })
+        else:
+            return jsonify({'error': 'Error logger not initialized'}), 500
+    except Exception as e:
+        logger.error(f"Failed to get error logs: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/admin/logs/performance', methods=['GET'])
+@rate_limit(requests_per_minute=10)
+def get_performance_logs():
+    """Get performance metrics"""
+    try:
+        # Simple authentication check
+        auth_token = request.headers.get('X-Admin-Token')
+        expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
+        
+        if auth_token != expected_token:
+            return jsonify({'error': 'Unauthorized'}), 401
+        
+        # Read performance log
+        perf_log_path = 'logs/performance.log'
+        metrics = {
+            'endpoints': {},
+            'slow_requests': [],
+            'average_response_times': {}
+        }
+        
+        if os.path.exists(perf_log_path):
+            try:
+                with open(perf_log_path, 'r') as f:
+                    for line in f.readlines()[-1000:]:  # Last 1000 entries
+                        try:
+                            entry = json.loads(line)
+                            if 'extra_fields' in entry:
+                                metric = entry['extra_fields'].get('metric', '')
+                                duration = entry['extra_fields'].get('duration_ms', 0)
+                                
+                                # Track endpoint metrics
+                                if metric not in metrics['endpoints']:
+                                    metrics['endpoints'][metric] = {
+                                        'count': 0,
+                                        'total_duration': 0,
+                                        'max_duration': 0,
+                                        'min_duration': float('inf')
+                                    }
+                                
+                                metrics['endpoints'][metric]['count'] += 1
+                                metrics['endpoints'][metric]['total_duration'] += duration
+                                metrics['endpoints'][metric]['max_duration'] = max(
+                                    metrics['endpoints'][metric]['max_duration'], duration
+                                )
+                                metrics['endpoints'][metric]['min_duration'] = min(
+                                    metrics['endpoints'][metric]['min_duration'], duration
+                                )
+                                
+                                # Track slow requests
+                                if duration > 1000:  # Over 1 second
+                                    metrics['slow_requests'].append({
+                                        'metric': metric,
+                                        'duration_ms': duration,
+                                        'timestamp': entry.get('timestamp')
+                                    })
+                        except json.JSONDecodeError:
+                            pass
+            except Exception as e:
+                logger.error(f"Failed to read performance log: {e}")
+        
+        # Calculate averages
+        for endpoint, data in metrics['endpoints'].items():
+            if data['count'] > 0:
+                metrics['average_response_times'][endpoint] = {
+                    'avg_ms': data['total_duration'] / data['count'],
+                    'max_ms': data['max_duration'],
+                    'min_ms': data['min_duration'] if data['min_duration'] != float('inf') else 0,
+                    'count': data['count']
+                }
+        
+        # Sort slow requests by duration
+        metrics['slow_requests'].sort(key=lambda x: x['duration_ms'], reverse=True)
+        metrics['slow_requests'] = metrics['slow_requests'][:20]  # Top 20 slowest
+        
+        return jsonify({
+            'performance_metrics': metrics['average_response_times'],
+            'slow_requests': metrics['slow_requests']
+        })
+    except Exception as e:
+        logger.error(f"Failed to get performance logs: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/admin/logs/security', methods=['GET'])
+@rate_limit(requests_per_minute=10)
+def get_security_logs():
+    """Get security event logs"""
+    try:
+        # Simple authentication check
+        auth_token = request.headers.get('X-Admin-Token')
+        expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
+        
+        if auth_token != expected_token:
+            return jsonify({'error': 'Unauthorized'}), 401
+        
+        # Read security log
+        security_log_path = 'logs/security.log'
+        security_events = []
+        
+        if os.path.exists(security_log_path):
+            try:
+                with open(security_log_path, 'r') as f:
+                    for line in f.readlines()[-200:]:  # Last 200 entries
+                        try:
+                            event = json.loads(line)
+                            security_events.append(event)
+                        except json.JSONDecodeError:
+                            pass
+            except Exception as e:
+                logger.error(f"Failed to read security log: {e}")
+        
+        # Group by event type
+        event_summary = {}
+        for event in security_events:
+            if 'extra_fields' in event:
+                event_type = event['extra_fields'].get('event', 'unknown')
+                if event_type not in event_summary:
+                    event_summary[event_type] = 0
+                event_summary[event_type] += 1
+        
+        return jsonify({
+            'security_events': security_events[-50:],  # Last 50 events
+            'event_summary': event_summary,
+            'total_events': len(security_events)
+        })
+    except Exception as e:
+        logger.error(f"Failed to get security logs: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
 if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5005, debug=True)