Implement proper error logging - Critical for debugging production issues

This comprehensive error logging system provides structured logging, automatic rotation, and detailed tracking for production debugging.

Key features:
- Structured JSON logging for easy parsing and analysis
- Multiple log streams: app, errors, access, security, performance
- Automatic log rotation to prevent disk space exhaustion
- Request tracing with unique IDs for debugging
- Performance metrics collection with slow request tracking
- Security event logging for suspicious activities
- Error deduplication and frequency tracking
- Full exception details with stack traces

Implementation details:
- StructuredFormatter outputs JSON-formatted logs
- ErrorLogger manages multiple specialized loggers
- Rotating file handlers prevent disk space issues
- Request context automatically included in logs
- Performance decorator tracks execution times
- Security events logged for audit trails
- Admin endpoints for log analysis

Admin endpoints:
- GET /admin/logs/errors - View recent errors and frequencies
- GET /admin/logs/performance - View performance metrics
- GET /admin/logs/security - View security events

Log types:
- talk2me.log - General application logs
- errors.log - Dedicated error logging with stack traces
- access.log - HTTP request/response logs
- security.log - Security events and suspicious activities
- performance.log - Performance metrics and timing

This provides production-grade observability critical for debugging issues, monitoring performance, and maintaining security in production environments.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-06-03 08:11:26 -06:00
parent aec2d3b0aa
commit 92b7c41f61
6 changed files with 1417 additions and 4 deletions

216
app.py
View File

@@ -37,6 +37,7 @@ from config import init_app as init_config
from secrets_manager import init_app as init_secrets
from session_manager import init_app as init_session_manager, track_resource
from request_size_limiter import RequestSizeLimiter, limit_request_size
from error_logger import ErrorLogger, log_errors, log_performance, log_exception, get_logger
# Error boundary decorator for Flask routes
def with_error_boundary(func):
@@ -45,16 +46,36 @@ def with_error_boundary(func):
try:
return func(*args, **kwargs)
except Exception as e:
# Log the full exception with traceback
logger.error(f"Error in {func.__name__}: {str(e)}")
logger.error(traceback.format_exc())
# Log the error with full context
log_exception(
e,
message=f"Error in {func.__name__}",
endpoint=request.endpoint,
method=request.method,
path=request.path,
ip=request.remote_addr,
function=func.__name__,
module=func.__module__
)
# Log security event for suspicious errors
if any(keyword in str(e).lower() for keyword in ['inject', 'attack', 'malicious', 'unauthorized']):
app.error_logger.log_security(
'suspicious_error',
severity='warning',
error_type=type(e).__name__,
error_message=str(e),
endpoint=request.endpoint,
ip=request.remote_addr
)
# Return appropriate error response
error_message = str(e) if app.debug else "An internal error occurred"
return jsonify({
'success': False,
'error': error_message,
'component': func.__name__
'component': func.__name__,
'request_id': getattr(g, 'request_id', None)
}), 500
return wrapper
@@ -119,6 +140,18 @@ request_size_limiter = RequestSizeLimiter(app, {
'max_image_size': app.config.get('MAX_IMAGE_SIZE', 10 * 1024 * 1024), # 10MB for images
})
# Initialize error logging system
error_logger = ErrorLogger(app, {
'log_level': app.config.get('LOG_LEVEL', 'INFO'),
'log_file': app.config.get('LOG_FILE', 'logs/talk2me.log'),
'error_log_file': app.config.get('ERROR_LOG_FILE', 'logs/errors.log'),
'max_bytes': app.config.get('LOG_MAX_BYTES', 50 * 1024 * 1024), # 50MB
'backup_count': app.config.get('LOG_BACKUP_COUNT', 10)
})
# Update logger to use the new system
logger = get_logger(__name__)
# TTS configuration is already loaded from config.py
# Warn if TTS API key is not set
if not app.config.get('TTS_API_KEY'):
@@ -604,6 +637,7 @@ def index():
@limit_request_size(max_audio_size=25 * 1024 * 1024) # 25MB limit for audio
@with_error_boundary
@track_resource('audio_file')
@log_performance('transcribe_audio')
def transcribe():
if 'audio' not in request.files:
return jsonify({'error': 'No audio file provided'}), 400
@@ -719,6 +753,7 @@ def transcribe():
@rate_limit(requests_per_minute=20, requests_per_hour=300, check_size=True)
@limit_request_size(max_size=1 * 1024 * 1024) # 1MB limit for JSON
@with_error_boundary
@log_performance('translate_text')
def translate():
try:
# Validate request size
@@ -1589,5 +1624,178 @@ def update_size_limits():
logger.error(f"Failed to update size limits: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/admin/logs/errors', methods=['GET'])
@rate_limit(requests_per_minute=10)
def get_error_logs():
"""Get recent error log entries"""
try:
# Simple authentication check
auth_token = request.headers.get('X-Admin-Token')
expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
if auth_token != expected_token:
return jsonify({'error': 'Unauthorized'}), 401
# Get error summary
if hasattr(app, 'error_logger'):
error_summary = app.error_logger.get_error_summary()
# Read last 100 lines from error log
error_log_path = app.error_logger.error_log_file
recent_errors = []
if os.path.exists(error_log_path):
try:
with open(error_log_path, 'r') as f:
lines = f.readlines()
# Get last 100 lines
for line in lines[-100:]:
try:
error_entry = json.loads(line)
recent_errors.append(error_entry)
except json.JSONDecodeError:
pass
except Exception as e:
logger.error(f"Failed to read error log: {e}")
return jsonify({
'error_summary': error_summary,
'recent_errors': recent_errors[-50:], # Last 50 errors
'total_errors_logged': len(recent_errors)
})
else:
return jsonify({'error': 'Error logger not initialized'}), 500
except Exception as e:
logger.error(f"Failed to get error logs: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/admin/logs/performance', methods=['GET'])
@rate_limit(requests_per_minute=10)
def get_performance_logs():
"""Get performance metrics"""
try:
# Simple authentication check
auth_token = request.headers.get('X-Admin-Token')
expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
if auth_token != expected_token:
return jsonify({'error': 'Unauthorized'}), 401
# Read performance log
perf_log_path = 'logs/performance.log'
metrics = {
'endpoints': {},
'slow_requests': [],
'average_response_times': {}
}
if os.path.exists(perf_log_path):
try:
with open(perf_log_path, 'r') as f:
for line in f.readlines()[-1000:]: # Last 1000 entries
try:
entry = json.loads(line)
if 'extra_fields' in entry:
metric = entry['extra_fields'].get('metric', '')
duration = entry['extra_fields'].get('duration_ms', 0)
# Track endpoint metrics
if metric not in metrics['endpoints']:
metrics['endpoints'][metric] = {
'count': 0,
'total_duration': 0,
'max_duration': 0,
'min_duration': float('inf')
}
metrics['endpoints'][metric]['count'] += 1
metrics['endpoints'][metric]['total_duration'] += duration
metrics['endpoints'][metric]['max_duration'] = max(
metrics['endpoints'][metric]['max_duration'], duration
)
metrics['endpoints'][metric]['min_duration'] = min(
metrics['endpoints'][metric]['min_duration'], duration
)
# Track slow requests
if duration > 1000: # Over 1 second
metrics['slow_requests'].append({
'metric': metric,
'duration_ms': duration,
'timestamp': entry.get('timestamp')
})
except json.JSONDecodeError:
pass
except Exception as e:
logger.error(f"Failed to read performance log: {e}")
# Calculate averages
for endpoint, data in metrics['endpoints'].items():
if data['count'] > 0:
metrics['average_response_times'][endpoint] = {
'avg_ms': data['total_duration'] / data['count'],
'max_ms': data['max_duration'],
'min_ms': data['min_duration'] if data['min_duration'] != float('inf') else 0,
'count': data['count']
}
# Sort slow requests by duration
metrics['slow_requests'].sort(key=lambda x: x['duration_ms'], reverse=True)
metrics['slow_requests'] = metrics['slow_requests'][:20] # Top 20 slowest
return jsonify({
'performance_metrics': metrics['average_response_times'],
'slow_requests': metrics['slow_requests']
})
except Exception as e:
logger.error(f"Failed to get performance logs: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/admin/logs/security', methods=['GET'])
@rate_limit(requests_per_minute=10)
def get_security_logs():
"""Get security event logs"""
try:
# Simple authentication check
auth_token = request.headers.get('X-Admin-Token')
expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
if auth_token != expected_token:
return jsonify({'error': 'Unauthorized'}), 401
# Read security log
security_log_path = 'logs/security.log'
security_events = []
if os.path.exists(security_log_path):
try:
with open(security_log_path, 'r') as f:
for line in f.readlines()[-200:]: # Last 200 entries
try:
event = json.loads(line)
security_events.append(event)
except json.JSONDecodeError:
pass
except Exception as e:
logger.error(f"Failed to read security log: {e}")
# Group by event type
event_summary = {}
for event in security_events:
if 'extra_fields' in event:
event_type = event['extra_fields'].get('event', 'unknown')
if event_type not in event_summary:
event_summary[event_type] = 0
event_summary[event_type] += 1
return jsonify({
'security_events': security_events[-50:], # Last 50 events
'event_summary': event_summary,
'total_events': len(security_events)
})
except Exception as e:
logger.error(f"Failed to get security logs: {str(e)}")
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5005, debug=True)