This comprehensive request size limiting system prevents memory exhaustion and DoS attacks from oversized requests. Key features: - Global request size limit: 50MB (configurable) - Type-specific limits: 25MB for audio, 1MB for JSON, 10MB for images - Multi-layer validation before loading data into memory - File type detection based on extensions - Endpoint-specific limit enforcement - Dynamic configuration via admin API - Clear error messages with size information Implementation details: - RequestSizeLimiter middleware with Flask integration - Pre-request validation using Content-Length header - File size checking for multipart uploads - JSON payload size validation - Custom decorator for route-specific limits - StreamSizeLimiter for chunked transfers - Integration with Flask's MAX_CONTENT_LENGTH Admin features: - GET /admin/size-limits - View current limits - POST /admin/size-limits - Update limits dynamically - Human-readable size formatting in responses - Size limit info in health check endpoints Security benefits: - Prevents memory exhaustion attacks - Blocks oversized uploads before processing - Protects against buffer overflow attempts - Works with rate limiting for comprehensive protection This addresses the critical security issue of unbounded request sizes that could lead to memory exhaustion or system crashes. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
302 lines
12 KiB
Python
302 lines
12 KiB
Python
# Request size limiting middleware for preventing memory exhaustion
|
|
import logging
|
|
from functools import wraps
|
|
from flask import request, jsonify, current_app
|
|
import os
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default size limits (in bytes)
|
|
DEFAULT_LIMITS = {
|
|
'max_content_length': 50 * 1024 * 1024, # 50MB global max
|
|
'max_audio_size': 25 * 1024 * 1024, # 25MB for audio files
|
|
'max_json_size': 1 * 1024 * 1024, # 1MB for JSON payloads
|
|
'max_image_size': 10 * 1024 * 1024, # 10MB for images
|
|
'max_chunk_size': 1 * 1024 * 1024, # 1MB chunks for streaming
|
|
}
|
|
|
|
# File extension to MIME type mapping
|
|
AUDIO_EXTENSIONS = {'.wav', '.mp3', '.ogg', '.webm', '.m4a', '.flac', '.aac'}
|
|
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
|
|
|
|
class RequestSizeLimiter:
|
|
"""
|
|
Middleware to enforce request size limits and prevent memory exhaustion
|
|
"""
|
|
def __init__(self, app=None, config=None):
|
|
self.config = config or {}
|
|
self.limits = {**DEFAULT_LIMITS, **self.config}
|
|
|
|
if app:
|
|
self.init_app(app)
|
|
|
|
def init_app(self, app):
|
|
"""Initialize the Flask application with size limiting"""
|
|
# Set Flask's MAX_CONTENT_LENGTH
|
|
app.config['MAX_CONTENT_LENGTH'] = self.limits['max_content_length']
|
|
|
|
# Store limiter in app
|
|
app.request_size_limiter = self
|
|
|
|
# Add before_request handler
|
|
app.before_request(self.check_request_size)
|
|
|
|
# Add error handler for 413 Request Entity Too Large
|
|
app.register_error_handler(413, self.handle_413)
|
|
|
|
logger.info(f"Request size limiter initialized with max content length: {self.limits['max_content_length'] / 1024 / 1024:.1f}MB")
|
|
|
|
def check_request_size(self):
|
|
"""Check request size before processing"""
|
|
# Skip size check for GET, HEAD, OPTIONS
|
|
if request.method in ('GET', 'HEAD', 'OPTIONS'):
|
|
return None
|
|
|
|
# Get content length
|
|
content_length = request.content_length
|
|
|
|
if content_length is None:
|
|
# No content-length header, check for chunked encoding
|
|
if request.headers.get('Transfer-Encoding') == 'chunked':
|
|
logger.warning(f"Chunked request from {request.remote_addr} to {request.endpoint}")
|
|
# For chunked requests, we'll need to monitor the stream
|
|
return None
|
|
else:
|
|
# No content, allow it
|
|
return None
|
|
|
|
# Check against global limit
|
|
if content_length > self.limits['max_content_length']:
|
|
logger.warning(f"Request from {request.remote_addr} exceeds global limit: {content_length} bytes")
|
|
return jsonify({
|
|
'error': 'Request too large',
|
|
'max_size': self.limits['max_content_length'],
|
|
'your_size': content_length
|
|
}), 413
|
|
|
|
# Check endpoint-specific limits
|
|
endpoint = request.endpoint
|
|
if endpoint:
|
|
endpoint_limit = self.get_endpoint_limit(endpoint)
|
|
if endpoint_limit and content_length > endpoint_limit:
|
|
logger.warning(f"Request from {request.remote_addr} to {endpoint} exceeds endpoint limit: {content_length} bytes")
|
|
return jsonify({
|
|
'error': f'Request too large for {endpoint}',
|
|
'max_size': endpoint_limit,
|
|
'your_size': content_length
|
|
}), 413
|
|
|
|
# Check file-specific limits
|
|
if request.files:
|
|
for file_key, file_obj in request.files.items():
|
|
# Check file size
|
|
file_obj.seek(0, os.SEEK_END)
|
|
file_size = file_obj.tell()
|
|
file_obj.seek(0) # Reset position
|
|
|
|
# Determine file type
|
|
filename = file_obj.filename or ''
|
|
file_ext = os.path.splitext(filename)[1].lower()
|
|
|
|
# Apply type-specific limits
|
|
if file_ext in AUDIO_EXTENSIONS:
|
|
max_size = self.limits.get('max_audio_size', self.limits['max_content_length'])
|
|
if file_size > max_size:
|
|
logger.warning(f"Audio file from {request.remote_addr} exceeds limit: {file_size} bytes")
|
|
return jsonify({
|
|
'error': 'Audio file too large',
|
|
'max_size': max_size,
|
|
'your_size': file_size,
|
|
'max_size_mb': round(max_size / 1024 / 1024, 1)
|
|
}), 413
|
|
|
|
elif file_ext in IMAGE_EXTENSIONS:
|
|
max_size = self.limits.get('max_image_size', self.limits['max_content_length'])
|
|
if file_size > max_size:
|
|
logger.warning(f"Image file from {request.remote_addr} exceeds limit: {file_size} bytes")
|
|
return jsonify({
|
|
'error': 'Image file too large',
|
|
'max_size': max_size,
|
|
'your_size': file_size,
|
|
'max_size_mb': round(max_size / 1024 / 1024, 1)
|
|
}), 413
|
|
|
|
# Check JSON payload size
|
|
if request.is_json:
|
|
try:
|
|
# Get raw data size
|
|
data_size = len(request.get_data())
|
|
max_json = self.limits.get('max_json_size', self.limits['max_content_length'])
|
|
|
|
if data_size > max_json:
|
|
logger.warning(f"JSON payload from {request.remote_addr} exceeds limit: {data_size} bytes")
|
|
return jsonify({
|
|
'error': 'JSON payload too large',
|
|
'max_size': max_json,
|
|
'your_size': data_size,
|
|
'max_size_kb': round(max_json / 1024, 1)
|
|
}), 413
|
|
except Exception as e:
|
|
logger.error(f"Error checking JSON size: {e}")
|
|
|
|
return None
|
|
|
|
def get_endpoint_limit(self, endpoint):
|
|
"""Get size limit for specific endpoint"""
|
|
endpoint_limits = {
|
|
'transcribe': self.limits.get('max_audio_size', 25 * 1024 * 1024),
|
|
'speak': self.limits.get('max_json_size', 1 * 1024 * 1024),
|
|
'translate': self.limits.get('max_json_size', 1 * 1024 * 1024),
|
|
'translate_stream': self.limits.get('max_json_size', 1 * 1024 * 1024),
|
|
}
|
|
return endpoint_limits.get(endpoint)
|
|
|
|
def handle_413(self, error):
|
|
"""Handle 413 Request Entity Too Large errors"""
|
|
logger.warning(f"413 error from {request.remote_addr}: {error}")
|
|
return jsonify({
|
|
'error': 'Request entity too large',
|
|
'message': 'The request payload is too large. Please reduce the size and try again.',
|
|
'max_size': self.limits['max_content_length'],
|
|
'max_size_mb': round(self.limits['max_content_length'] / 1024 / 1024, 1)
|
|
}), 413
|
|
|
|
def update_limits(self, **kwargs):
|
|
"""Update size limits dynamically"""
|
|
old_limits = self.limits.copy()
|
|
self.limits.update(kwargs)
|
|
|
|
# Update Flask's MAX_CONTENT_LENGTH if changed
|
|
if 'max_content_length' in kwargs and current_app:
|
|
current_app.config['MAX_CONTENT_LENGTH'] = kwargs['max_content_length']
|
|
|
|
logger.info(f"Updated size limits: {kwargs}")
|
|
return old_limits
|
|
|
|
def limit_request_size(**limit_kwargs):
|
|
"""
|
|
Decorator to apply custom size limits to specific routes
|
|
|
|
Usage:
|
|
@app.route('/upload')
|
|
@limit_request_size(max_size=10*1024*1024) # 10MB limit
|
|
def upload():
|
|
...
|
|
"""
|
|
def decorator(f):
|
|
@wraps(f)
|
|
def wrapper(*args, **kwargs):
|
|
# Check content length
|
|
content_length = request.content_length
|
|
max_size = limit_kwargs.get('max_size', DEFAULT_LIMITS['max_content_length'])
|
|
|
|
if content_length and content_length > max_size:
|
|
logger.warning(f"Request to {request.endpoint} exceeds custom limit: {content_length} bytes")
|
|
return jsonify({
|
|
'error': 'Request too large',
|
|
'max_size': max_size,
|
|
'your_size': content_length,
|
|
'max_size_mb': round(max_size / 1024 / 1024, 1)
|
|
}), 413
|
|
|
|
# Check specific file types if specified
|
|
if 'max_audio_size' in limit_kwargs and request.files:
|
|
for file_obj in request.files.values():
|
|
if file_obj.filename:
|
|
ext = os.path.splitext(file_obj.filename)[1].lower()
|
|
if ext in AUDIO_EXTENSIONS:
|
|
file_obj.seek(0, os.SEEK_END)
|
|
file_size = file_obj.tell()
|
|
file_obj.seek(0)
|
|
|
|
if file_size > limit_kwargs['max_audio_size']:
|
|
return jsonify({
|
|
'error': 'Audio file too large',
|
|
'max_size': limit_kwargs['max_audio_size'],
|
|
'your_size': file_size,
|
|
'max_size_mb': round(limit_kwargs['max_audio_size'] / 1024 / 1024, 1)
|
|
}), 413
|
|
|
|
return f(*args, **kwargs)
|
|
return wrapper
|
|
return decorator
|
|
|
|
class StreamSizeLimiter:
|
|
"""
|
|
Helper class to limit streaming request sizes
|
|
"""
|
|
def __init__(self, stream, max_size):
|
|
self.stream = stream
|
|
self.max_size = max_size
|
|
self.bytes_read = 0
|
|
|
|
def read(self, size=-1):
|
|
"""Read from stream with size limit enforcement"""
|
|
if size == -1:
|
|
# Read all remaining, but respect limit
|
|
size = self.max_size - self.bytes_read
|
|
|
|
# Check if we would exceed limit
|
|
if self.bytes_read + size > self.max_size:
|
|
raise ValueError(f"Stream size exceeds limit of {self.max_size} bytes")
|
|
|
|
data = self.stream.read(size)
|
|
self.bytes_read += len(data)
|
|
|
|
return data
|
|
|
|
def readline(self, size=-1):
|
|
"""Read line from stream with size limit enforcement"""
|
|
if size == -1:
|
|
size = self.max_size - self.bytes_read
|
|
|
|
if self.bytes_read + size > self.max_size:
|
|
raise ValueError(f"Stream size exceeds limit of {self.max_size} bytes")
|
|
|
|
line = self.stream.readline(size)
|
|
self.bytes_read += len(line)
|
|
|
|
return line
|
|
|
|
# Utility functions
|
|
def get_request_size():
|
|
"""Get the size of the current request"""
|
|
if request.content_length:
|
|
return request.content_length
|
|
|
|
# For chunked requests, read and measure
|
|
try:
|
|
data = request.get_data()
|
|
return len(data)
|
|
except Exception:
|
|
return 0
|
|
|
|
def format_size(size_bytes):
|
|
"""Format size in human-readable format"""
|
|
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
if size_bytes < 1024.0:
|
|
return f"{size_bytes:.1f} {unit}"
|
|
size_bytes /= 1024.0
|
|
return f"{size_bytes:.1f} TB"
|
|
|
|
# Configuration helper
|
|
def configure_size_limits(app, **kwargs):
|
|
"""
|
|
Configure size limits for the application
|
|
|
|
Args:
|
|
app: Flask application
|
|
max_content_length: Global maximum request size
|
|
max_audio_size: Maximum audio file size
|
|
max_json_size: Maximum JSON payload size
|
|
max_image_size: Maximum image file size
|
|
"""
|
|
config = {
|
|
'max_content_length': kwargs.get('max_content_length', DEFAULT_LIMITS['max_content_length']),
|
|
'max_audio_size': kwargs.get('max_audio_size', DEFAULT_LIMITS['max_audio_size']),
|
|
'max_json_size': kwargs.get('max_json_size', DEFAULT_LIMITS['max_json_size']),
|
|
'max_image_size': kwargs.get('max_image_size', DEFAULT_LIMITS['max_image_size']),
|
|
}
|
|
|
|
limiter = RequestSizeLimiter(app, config)
|
|
return limiter |