Fix potential memory leaks in audio handling - Can crash server after extended use

This comprehensive fix addresses memory leaks in both backend and frontend that could cause server crashes after extended use.

Backend fixes:
- MemoryManager class monitors process and GPU memory usage
- Automatic cleanup when thresholds exceeded (4GB process, 2GB GPU)
- Whisper model reloading to clear GPU memory fragmentation
- Aggressive temporary file cleanup based on age
- Context manager for audio processing with guaranteed cleanup
- Integration with session manager for resource tracking
- Background monitoring thread runs every 30 seconds

Frontend fixes:
- MemoryManager singleton tracks all browser resources
- SafeMediaRecorder wrapper ensures stream cleanup
- AudioBlobHandler manages blob lifecycle and object URLs
- Automatic cleanup of closed AudioContexts
- Proper MediaStream track stopping
- Periodic cleanup of orphaned resources
- Cleanup on page unload

Admin features:
- GET /admin/memory - View memory statistics
- POST /admin/memory/cleanup - Trigger manual cleanup
- Real-time metrics including GPU usage and temp files
- Model reload tracking

Key improvements:
- AudioContext properly closed after use
- Object URLs revoked after use
- MediaRecorder streams properly stopped
- Audio chunks cleared after processing
- GPU cache cleared after each transcription
- Temp files tracked and cleaned aggressively

This prevents the gradual memory increase that could lead to out-of-memory errors or performance degradation after hours of use.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-06-03 08:37:13 -06:00
parent 92b7c41f61
commit 1b9ad03400
7 changed files with 1194 additions and 93 deletions

129
app.py
View File

@@ -38,6 +38,7 @@ from secrets_manager import init_app as init_secrets
from session_manager import init_app as init_session_manager, track_resource
from request_size_limiter import RequestSizeLimiter, limit_request_size
from error_logger import ErrorLogger, log_errors, log_performance, log_exception, get_logger
from memory_manager import MemoryManager, AudioProcessingContext, with_memory_management
# Error boundary decorator for Flask routes
def with_error_boundary(func):
@@ -152,6 +153,13 @@ error_logger = ErrorLogger(app, {
# Update logger to use the new system
logger = get_logger(__name__)
# Initialize memory management
memory_manager = MemoryManager(app, {
'memory_threshold_mb': app.config.get('MEMORY_THRESHOLD_MB', 4096),
'gpu_memory_threshold_mb': app.config.get('GPU_MEMORY_THRESHOLD_MB', 2048),
'cleanup_interval': app.config.get('MEMORY_CLEANUP_INTERVAL', 30)
})
# TTS configuration is already loaded from config.py
# Warn if TTS API key is not set
if not app.config.get('TTS_API_KEY'):
@@ -589,6 +597,10 @@ else:
whisper_model.eval()
logger.info("Whisper model loaded (CPU mode)")
# Register model with memory manager
memory_manager.set_whisper_model(whisper_model)
app.whisper_model = whisper_model
# Supported languages
SUPPORTED_LANGUAGES = {
"ar": "Arabic",
@@ -638,27 +650,35 @@ def index():
@with_error_boundary
@track_resource('audio_file')
@log_performance('transcribe_audio')
@with_memory_management
def transcribe():
if 'audio' not in request.files:
return jsonify({'error': 'No audio file provided'}), 400
# Use memory management context
with AudioProcessingContext(app.memory_manager, name='transcribe') as ctx:
if 'audio' not in request.files:
return jsonify({'error': 'No audio file provided'}), 400
audio_file = request.files['audio']
# Validate audio file
valid, error_msg = Validators.validate_audio_file(audio_file)
if not valid:
return jsonify({'error': error_msg}), 400
# Validate and sanitize language code
source_lang = request.form.get('source_lang', '')
allowed_languages = set(SUPPORTED_LANGUAGES.values())
source_lang = Validators.validate_language_code(source_lang, allowed_languages) or ''
audio_file = request.files['audio']
# Validate audio file
valid, error_msg = Validators.validate_audio_file(audio_file)
if not valid:
return jsonify({'error': error_msg}), 400
# Validate and sanitize language code
source_lang = request.form.get('source_lang', '')
allowed_languages = set(SUPPORTED_LANGUAGES.values())
source_lang = Validators.validate_language_code(source_lang, allowed_languages) or ''
# Save the audio file temporarily with unique name
temp_filename = f'input_audio_{int(time.time() * 1000)}.wav'
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], temp_filename)
audio_file.save(temp_path)
register_temp_file(temp_path)
# Save the audio file temporarily with unique name
temp_filename = f'input_audio_{int(time.time() * 1000)}.wav'
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], temp_filename)
# Ensure file handle is properly closed
with open(temp_path, 'wb') as f:
audio_file.save(f)
register_temp_file(temp_path)
ctx.add_temp_file(temp_path) # Register with context for cleanup
# Add to session resources
if hasattr(g, 'session_manager') and hasattr(g, 'user_session'):
@@ -741,12 +761,17 @@ def transcribe():
return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
finally:
# Clean up the temporary file
if os.path.exists(temp_path):
os.remove(temp_path)
try:
if 'temp_path' in locals() and os.path.exists(temp_path):
os.remove(temp_path)
temp_file_registry.pop(temp_path, None)
except Exception as e:
logger.error(f"Failed to clean up temp file: {e}")
# Force garbage collection to free memory
if device.type == 'cuda':
torch.cuda.empty_cache()
torch.cuda.synchronize() # Ensure all CUDA operations are complete
gc.collect()
@app.route('/translate', methods=['POST'])
@@ -1797,5 +1822,69 @@ def get_security_logs():
logger.error(f"Failed to get security logs: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/admin/memory', methods=['GET'])
@rate_limit(requests_per_minute=10)
def get_memory_stats():
"""Get memory usage statistics"""
try:
# Simple authentication check
auth_token = request.headers.get('X-Admin-Token')
expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
if auth_token != expected_token:
return jsonify({'error': 'Unauthorized'}), 401
if hasattr(app, 'memory_manager'):
metrics = app.memory_manager.get_metrics()
return jsonify(metrics)
else:
return jsonify({'error': 'Memory manager not initialized'}), 500
except Exception as e:
logger.error(f"Failed to get memory stats: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/admin/memory/cleanup', methods=['POST'])
@rate_limit(requests_per_minute=5)
def trigger_memory_cleanup():
"""Manually trigger memory cleanup"""
try:
# Simple authentication check
auth_token = request.headers.get('X-Admin-Token')
expected_token = app.config.get('ADMIN_TOKEN', 'default-admin-token')
if auth_token != expected_token:
return jsonify({'error': 'Unauthorized'}), 401
if hasattr(app, 'memory_manager'):
# Get stats before cleanup
before_stats = app.memory_manager.get_memory_stats()
# Perform aggressive cleanup
app.memory_manager.cleanup_memory(aggressive=True)
# Get stats after cleanup
after_stats = app.memory_manager.get_memory_stats()
return jsonify({
'success': True,
'before': {
'process_mb': before_stats.process_memory_mb,
'gpu_mb': before_stats.gpu_memory_mb
},
'after': {
'process_mb': after_stats.process_memory_mb,
'gpu_mb': after_stats.gpu_memory_mb
},
'freed': {
'process_mb': before_stats.process_memory_mb - after_stats.process_memory_mb,
'gpu_mb': before_stats.gpu_memory_mb - after_stats.gpu_memory_mb
}
})
else:
return jsonify({'error': 'Memory manager not initialized'}), 500
except Exception as e:
logger.error(f"Failed to trigger memory cleanup: {str(e)}")
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5005, debug=True)