# Request size limiting middleware for preventing memory exhaustion import logging from functools import wraps from flask import request, jsonify, current_app import os logger = logging.getLogger(__name__) # Default size limits (in bytes) DEFAULT_LIMITS = { 'max_content_length': 50 * 1024 * 1024, # 50MB global max 'max_audio_size': 25 * 1024 * 1024, # 25MB for audio files 'max_json_size': 1 * 1024 * 1024, # 1MB for JSON payloads 'max_image_size': 10 * 1024 * 1024, # 10MB for images 'max_chunk_size': 1 * 1024 * 1024, # 1MB chunks for streaming } # File extension to MIME type mapping AUDIO_EXTENSIONS = {'.wav', '.mp3', '.ogg', '.webm', '.m4a', '.flac', '.aac'} IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'} class RequestSizeLimiter: """ Middleware to enforce request size limits and prevent memory exhaustion """ def __init__(self, app=None, config=None): self.config = config or {} self.limits = {**DEFAULT_LIMITS, **self.config} if app: self.init_app(app) def init_app(self, app): """Initialize the Flask application with size limiting""" # Set Flask's MAX_CONTENT_LENGTH app.config['MAX_CONTENT_LENGTH'] = self.limits['max_content_length'] # Store limiter in app app.request_size_limiter = self # Add before_request handler app.before_request(self.check_request_size) # Add error handler for 413 Request Entity Too Large app.register_error_handler(413, self.handle_413) logger.info(f"Request size limiter initialized with max content length: {self.limits['max_content_length'] / 1024 / 1024:.1f}MB") def check_request_size(self): """Check request size before processing""" # Skip size check for GET, HEAD, OPTIONS if request.method in ('GET', 'HEAD', 'OPTIONS'): return None # Get content length content_length = request.content_length if content_length is None: # No content-length header, check for chunked encoding if request.headers.get('Transfer-Encoding') == 'chunked': logger.warning(f"Chunked request from {request.remote_addr} to {request.endpoint}") # For chunked requests, we'll need to monitor the stream return None else: # No content, allow it return None # Check against global limit if content_length > self.limits['max_content_length']: logger.warning(f"Request from {request.remote_addr} exceeds global limit: {content_length} bytes") return jsonify({ 'error': 'Request too large', 'max_size': self.limits['max_content_length'], 'your_size': content_length }), 413 # Check endpoint-specific limits endpoint = request.endpoint if endpoint: endpoint_limit = self.get_endpoint_limit(endpoint) if endpoint_limit and content_length > endpoint_limit: logger.warning(f"Request from {request.remote_addr} to {endpoint} exceeds endpoint limit: {content_length} bytes") return jsonify({ 'error': f'Request too large for {endpoint}', 'max_size': endpoint_limit, 'your_size': content_length }), 413 # Check file-specific limits if request.files: for file_key, file_obj in request.files.items(): # Check file size file_obj.seek(0, os.SEEK_END) file_size = file_obj.tell() file_obj.seek(0) # Reset position # Determine file type filename = file_obj.filename or '' file_ext = os.path.splitext(filename)[1].lower() # Apply type-specific limits if file_ext in AUDIO_EXTENSIONS: max_size = self.limits.get('max_audio_size', self.limits['max_content_length']) if file_size > max_size: logger.warning(f"Audio file from {request.remote_addr} exceeds limit: {file_size} bytes") return jsonify({ 'error': 'Audio file too large', 'max_size': max_size, 'your_size': file_size, 'max_size_mb': round(max_size / 1024 / 1024, 1) }), 413 elif file_ext in IMAGE_EXTENSIONS: max_size = self.limits.get('max_image_size', self.limits['max_content_length']) if file_size > max_size: logger.warning(f"Image file from {request.remote_addr} exceeds limit: {file_size} bytes") return jsonify({ 'error': 'Image file too large', 'max_size': max_size, 'your_size': file_size, 'max_size_mb': round(max_size / 1024 / 1024, 1) }), 413 # Check JSON payload size if request.is_json: try: # Get raw data size data_size = len(request.get_data()) max_json = self.limits.get('max_json_size', self.limits['max_content_length']) if data_size > max_json: logger.warning(f"JSON payload from {request.remote_addr} exceeds limit: {data_size} bytes") return jsonify({ 'error': 'JSON payload too large', 'max_size': max_json, 'your_size': data_size, 'max_size_kb': round(max_json / 1024, 1) }), 413 except Exception as e: logger.error(f"Error checking JSON size: {e}") return None def get_endpoint_limit(self, endpoint): """Get size limit for specific endpoint""" endpoint_limits = { 'transcribe': self.limits.get('max_audio_size', 25 * 1024 * 1024), 'speak': self.limits.get('max_json_size', 1 * 1024 * 1024), 'translate': self.limits.get('max_json_size', 1 * 1024 * 1024), 'translate_stream': self.limits.get('max_json_size', 1 * 1024 * 1024), } return endpoint_limits.get(endpoint) def handle_413(self, error): """Handle 413 Request Entity Too Large errors""" logger.warning(f"413 error from {request.remote_addr}: {error}") return jsonify({ 'error': 'Request entity too large', 'message': 'The request payload is too large. Please reduce the size and try again.', 'max_size': self.limits['max_content_length'], 'max_size_mb': round(self.limits['max_content_length'] / 1024 / 1024, 1) }), 413 def update_limits(self, **kwargs): """Update size limits dynamically""" old_limits = self.limits.copy() self.limits.update(kwargs) # Update Flask's MAX_CONTENT_LENGTH if changed if 'max_content_length' in kwargs and current_app: current_app.config['MAX_CONTENT_LENGTH'] = kwargs['max_content_length'] logger.info(f"Updated size limits: {kwargs}") return old_limits def limit_request_size(**limit_kwargs): """ Decorator to apply custom size limits to specific routes Usage: @app.route('/upload') @limit_request_size(max_size=10*1024*1024) # 10MB limit def upload(): ... """ def decorator(f): @wraps(f) def wrapper(*args, **kwargs): # Check content length content_length = request.content_length max_size = limit_kwargs.get('max_size', DEFAULT_LIMITS['max_content_length']) if content_length and content_length > max_size: logger.warning(f"Request to {request.endpoint} exceeds custom limit: {content_length} bytes") return jsonify({ 'error': 'Request too large', 'max_size': max_size, 'your_size': content_length, 'max_size_mb': round(max_size / 1024 / 1024, 1) }), 413 # Check specific file types if specified if 'max_audio_size' in limit_kwargs and request.files: for file_obj in request.files.values(): if file_obj.filename: ext = os.path.splitext(file_obj.filename)[1].lower() if ext in AUDIO_EXTENSIONS: file_obj.seek(0, os.SEEK_END) file_size = file_obj.tell() file_obj.seek(0) if file_size > limit_kwargs['max_audio_size']: return jsonify({ 'error': 'Audio file too large', 'max_size': limit_kwargs['max_audio_size'], 'your_size': file_size, 'max_size_mb': round(limit_kwargs['max_audio_size'] / 1024 / 1024, 1) }), 413 return f(*args, **kwargs) return wrapper return decorator class StreamSizeLimiter: """ Helper class to limit streaming request sizes """ def __init__(self, stream, max_size): self.stream = stream self.max_size = max_size self.bytes_read = 0 def read(self, size=-1): """Read from stream with size limit enforcement""" if size == -1: # Read all remaining, but respect limit size = self.max_size - self.bytes_read # Check if we would exceed limit if self.bytes_read + size > self.max_size: raise ValueError(f"Stream size exceeds limit of {self.max_size} bytes") data = self.stream.read(size) self.bytes_read += len(data) return data def readline(self, size=-1): """Read line from stream with size limit enforcement""" if size == -1: size = self.max_size - self.bytes_read if self.bytes_read + size > self.max_size: raise ValueError(f"Stream size exceeds limit of {self.max_size} bytes") line = self.stream.readline(size) self.bytes_read += len(line) return line # Utility functions def get_request_size(): """Get the size of the current request""" if request.content_length: return request.content_length # For chunked requests, read and measure try: data = request.get_data() return len(data) except Exception: return 0 def format_size(size_bytes): """Format size in human-readable format""" for unit in ['B', 'KB', 'MB', 'GB']: if size_bytes < 1024.0: return f"{size_bytes:.1f} {unit}" size_bytes /= 1024.0 return f"{size_bytes:.1f} TB" # Configuration helper def configure_size_limits(app, **kwargs): """ Configure size limits for the application Args: app: Flask application max_content_length: Global maximum request size max_audio_size: Maximum audio file size max_json_size: Maximum JSON payload size max_image_size: Maximum image file size """ config = { 'max_content_length': kwargs.get('max_content_length', DEFAULT_LIMITS['max_content_length']), 'max_audio_size': kwargs.get('max_audio_size', DEFAULT_LIMITS['max_audio_size']), 'max_json_size': kwargs.get('max_json_size', DEFAULT_LIMITS['max_json_size']), 'max_image_size': kwargs.get('max_image_size', DEFAULT_LIMITS['max_image_size']), } limiter = RequestSizeLimiter(app, config) return limiter