talk2me/request_size_limiter.py
Adolfo Delorenzo aec2d3b0aa Add request size limits - Prevents memory exhaustion from large uploads
This comprehensive request size limiting system prevents memory exhaustion and DoS attacks from oversized requests.

Key features:
- Global request size limit: 50MB (configurable)
- Type-specific limits: 25MB for audio, 1MB for JSON, 10MB for images
- Multi-layer validation before loading data into memory
- File type detection based on extensions
- Endpoint-specific limit enforcement
- Dynamic configuration via admin API
- Clear error messages with size information

Implementation details:
- RequestSizeLimiter middleware with Flask integration
- Pre-request validation using Content-Length header
- File size checking for multipart uploads
- JSON payload size validation
- Custom decorator for route-specific limits
- StreamSizeLimiter for chunked transfers
- Integration with Flask's MAX_CONTENT_LENGTH

Admin features:
- GET /admin/size-limits - View current limits
- POST /admin/size-limits - Update limits dynamically
- Human-readable size formatting in responses
- Size limit info in health check endpoints

Security benefits:
- Prevents memory exhaustion attacks
- Blocks oversized uploads before processing
- Protects against buffer overflow attempts
- Works with rate limiting for comprehensive protection

This addresses the critical security issue of unbounded request sizes that could lead to memory exhaustion or system crashes.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-03 00:58:14 -06:00

302 lines
12 KiB
Python

# Request size limiting middleware for preventing memory exhaustion
import logging
from functools import wraps
from flask import request, jsonify, current_app
import os
logger = logging.getLogger(__name__)
# Default size limits (in bytes)
DEFAULT_LIMITS = {
'max_content_length': 50 * 1024 * 1024, # 50MB global max
'max_audio_size': 25 * 1024 * 1024, # 25MB for audio files
'max_json_size': 1 * 1024 * 1024, # 1MB for JSON payloads
'max_image_size': 10 * 1024 * 1024, # 10MB for images
'max_chunk_size': 1 * 1024 * 1024, # 1MB chunks for streaming
}
# File extension to MIME type mapping
AUDIO_EXTENSIONS = {'.wav', '.mp3', '.ogg', '.webm', '.m4a', '.flac', '.aac'}
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
class RequestSizeLimiter:
"""
Middleware to enforce request size limits and prevent memory exhaustion
"""
def __init__(self, app=None, config=None):
self.config = config or {}
self.limits = {**DEFAULT_LIMITS, **self.config}
if app:
self.init_app(app)
def init_app(self, app):
"""Initialize the Flask application with size limiting"""
# Set Flask's MAX_CONTENT_LENGTH
app.config['MAX_CONTENT_LENGTH'] = self.limits['max_content_length']
# Store limiter in app
app.request_size_limiter = self
# Add before_request handler
app.before_request(self.check_request_size)
# Add error handler for 413 Request Entity Too Large
app.register_error_handler(413, self.handle_413)
logger.info(f"Request size limiter initialized with max content length: {self.limits['max_content_length'] / 1024 / 1024:.1f}MB")
def check_request_size(self):
"""Check request size before processing"""
# Skip size check for GET, HEAD, OPTIONS
if request.method in ('GET', 'HEAD', 'OPTIONS'):
return None
# Get content length
content_length = request.content_length
if content_length is None:
# No content-length header, check for chunked encoding
if request.headers.get('Transfer-Encoding') == 'chunked':
logger.warning(f"Chunked request from {request.remote_addr} to {request.endpoint}")
# For chunked requests, we'll need to monitor the stream
return None
else:
# No content, allow it
return None
# Check against global limit
if content_length > self.limits['max_content_length']:
logger.warning(f"Request from {request.remote_addr} exceeds global limit: {content_length} bytes")
return jsonify({
'error': 'Request too large',
'max_size': self.limits['max_content_length'],
'your_size': content_length
}), 413
# Check endpoint-specific limits
endpoint = request.endpoint
if endpoint:
endpoint_limit = self.get_endpoint_limit(endpoint)
if endpoint_limit and content_length > endpoint_limit:
logger.warning(f"Request from {request.remote_addr} to {endpoint} exceeds endpoint limit: {content_length} bytes")
return jsonify({
'error': f'Request too large for {endpoint}',
'max_size': endpoint_limit,
'your_size': content_length
}), 413
# Check file-specific limits
if request.files:
for file_key, file_obj in request.files.items():
# Check file size
file_obj.seek(0, os.SEEK_END)
file_size = file_obj.tell()
file_obj.seek(0) # Reset position
# Determine file type
filename = file_obj.filename or ''
file_ext = os.path.splitext(filename)[1].lower()
# Apply type-specific limits
if file_ext in AUDIO_EXTENSIONS:
max_size = self.limits.get('max_audio_size', self.limits['max_content_length'])
if file_size > max_size:
logger.warning(f"Audio file from {request.remote_addr} exceeds limit: {file_size} bytes")
return jsonify({
'error': 'Audio file too large',
'max_size': max_size,
'your_size': file_size,
'max_size_mb': round(max_size / 1024 / 1024, 1)
}), 413
elif file_ext in IMAGE_EXTENSIONS:
max_size = self.limits.get('max_image_size', self.limits['max_content_length'])
if file_size > max_size:
logger.warning(f"Image file from {request.remote_addr} exceeds limit: {file_size} bytes")
return jsonify({
'error': 'Image file too large',
'max_size': max_size,
'your_size': file_size,
'max_size_mb': round(max_size / 1024 / 1024, 1)
}), 413
# Check JSON payload size
if request.is_json:
try:
# Get raw data size
data_size = len(request.get_data())
max_json = self.limits.get('max_json_size', self.limits['max_content_length'])
if data_size > max_json:
logger.warning(f"JSON payload from {request.remote_addr} exceeds limit: {data_size} bytes")
return jsonify({
'error': 'JSON payload too large',
'max_size': max_json,
'your_size': data_size,
'max_size_kb': round(max_json / 1024, 1)
}), 413
except Exception as e:
logger.error(f"Error checking JSON size: {e}")
return None
def get_endpoint_limit(self, endpoint):
"""Get size limit for specific endpoint"""
endpoint_limits = {
'transcribe': self.limits.get('max_audio_size', 25 * 1024 * 1024),
'speak': self.limits.get('max_json_size', 1 * 1024 * 1024),
'translate': self.limits.get('max_json_size', 1 * 1024 * 1024),
'translate_stream': self.limits.get('max_json_size', 1 * 1024 * 1024),
}
return endpoint_limits.get(endpoint)
def handle_413(self, error):
"""Handle 413 Request Entity Too Large errors"""
logger.warning(f"413 error from {request.remote_addr}: {error}")
return jsonify({
'error': 'Request entity too large',
'message': 'The request payload is too large. Please reduce the size and try again.',
'max_size': self.limits['max_content_length'],
'max_size_mb': round(self.limits['max_content_length'] / 1024 / 1024, 1)
}), 413
def update_limits(self, **kwargs):
"""Update size limits dynamically"""
old_limits = self.limits.copy()
self.limits.update(kwargs)
# Update Flask's MAX_CONTENT_LENGTH if changed
if 'max_content_length' in kwargs and current_app:
current_app.config['MAX_CONTENT_LENGTH'] = kwargs['max_content_length']
logger.info(f"Updated size limits: {kwargs}")
return old_limits
def limit_request_size(**limit_kwargs):
"""
Decorator to apply custom size limits to specific routes
Usage:
@app.route('/upload')
@limit_request_size(max_size=10*1024*1024) # 10MB limit
def upload():
...
"""
def decorator(f):
@wraps(f)
def wrapper(*args, **kwargs):
# Check content length
content_length = request.content_length
max_size = limit_kwargs.get('max_size', DEFAULT_LIMITS['max_content_length'])
if content_length and content_length > max_size:
logger.warning(f"Request to {request.endpoint} exceeds custom limit: {content_length} bytes")
return jsonify({
'error': 'Request too large',
'max_size': max_size,
'your_size': content_length,
'max_size_mb': round(max_size / 1024 / 1024, 1)
}), 413
# Check specific file types if specified
if 'max_audio_size' in limit_kwargs and request.files:
for file_obj in request.files.values():
if file_obj.filename:
ext = os.path.splitext(file_obj.filename)[1].lower()
if ext in AUDIO_EXTENSIONS:
file_obj.seek(0, os.SEEK_END)
file_size = file_obj.tell()
file_obj.seek(0)
if file_size > limit_kwargs['max_audio_size']:
return jsonify({
'error': 'Audio file too large',
'max_size': limit_kwargs['max_audio_size'],
'your_size': file_size,
'max_size_mb': round(limit_kwargs['max_audio_size'] / 1024 / 1024, 1)
}), 413
return f(*args, **kwargs)
return wrapper
return decorator
class StreamSizeLimiter:
"""
Helper class to limit streaming request sizes
"""
def __init__(self, stream, max_size):
self.stream = stream
self.max_size = max_size
self.bytes_read = 0
def read(self, size=-1):
"""Read from stream with size limit enforcement"""
if size == -1:
# Read all remaining, but respect limit
size = self.max_size - self.bytes_read
# Check if we would exceed limit
if self.bytes_read + size > self.max_size:
raise ValueError(f"Stream size exceeds limit of {self.max_size} bytes")
data = self.stream.read(size)
self.bytes_read += len(data)
return data
def readline(self, size=-1):
"""Read line from stream with size limit enforcement"""
if size == -1:
size = self.max_size - self.bytes_read
if self.bytes_read + size > self.max_size:
raise ValueError(f"Stream size exceeds limit of {self.max_size} bytes")
line = self.stream.readline(size)
self.bytes_read += len(line)
return line
# Utility functions
def get_request_size():
"""Get the size of the current request"""
if request.content_length:
return request.content_length
# For chunked requests, read and measure
try:
data = request.get_data()
return len(data)
except Exception:
return 0
def format_size(size_bytes):
"""Format size in human-readable format"""
for unit in ['B', 'KB', 'MB', 'GB']:
if size_bytes < 1024.0:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.1f} TB"
# Configuration helper
def configure_size_limits(app, **kwargs):
"""
Configure size limits for the application
Args:
app: Flask application
max_content_length: Global maximum request size
max_audio_size: Maximum audio file size
max_json_size: Maximum JSON payload size
max_image_size: Maximum image file size
"""
config = {
'max_content_length': kwargs.get('max_content_length', DEFAULT_LIMITS['max_content_length']),
'max_audio_size': kwargs.get('max_audio_size', DEFAULT_LIMITS['max_audio_size']),
'max_json_size': kwargs.get('max_json_size', DEFAULT_LIMITS['max_json_size']),
'max_image_size': kwargs.get('max_image_size', DEFAULT_LIMITS['max_image_size']),
}
limiter = RequestSizeLimiter(app, config)
return limiter