# Session management system for preventing resource leaks import time import uuid import logging from datetime import datetime, timedelta from typing import Dict, Any, Optional, List, Tuple from dataclasses import dataclass, field from threading import Lock, Thread import json import os import tempfile import shutil from collections import defaultdict from functools import wraps from flask import session, request, g, current_app logger = logging.getLogger(__name__) @dataclass class SessionResource: """Represents a resource associated with a session""" resource_id: str resource_type: str # 'audio_file', 'temp_file', 'websocket', 'stream' path: Optional[str] = None created_at: float = field(default_factory=time.time) last_accessed: float = field(default_factory=time.time) size_bytes: int = 0 metadata: Dict[str, Any] = field(default_factory=dict) @dataclass class UserSession: """Represents a user session with associated resources""" session_id: str user_id: Optional[str] = None ip_address: Optional[str] = None user_agent: Optional[str] = None created_at: float = field(default_factory=time.time) last_activity: float = field(default_factory=time.time) resources: Dict[str, SessionResource] = field(default_factory=dict) request_count: int = 0 total_bytes_used: int = 0 active_streams: int = 0 metadata: Dict[str, Any] = field(default_factory=dict) class SessionManager: """ Manages user sessions and associated resources to prevent leaks """ def __init__(self, config: Dict[str, Any] = None): self.config = config or {} self.sessions: Dict[str, UserSession] = {} self.lock = Lock() # Configuration self.max_session_duration = self.config.get('max_session_duration', 3600) # 1 hour self.max_idle_time = self.config.get('max_idle_time', 900) # 15 minutes self.max_resources_per_session = self.config.get('max_resources_per_session', 100) self.max_bytes_per_session = self.config.get('max_bytes_per_session', 100 * 1024 * 1024) # 100MB self.cleanup_interval = self.config.get('cleanup_interval', 60) # 1 minute self.session_storage_path = self.config.get('session_storage_path', os.path.join(tempfile.gettempdir(), 'talk2me_sessions')) # Statistics self.stats = { 'total_sessions_created': 0, 'total_sessions_cleaned': 0, 'total_resources_cleaned': 0, 'total_bytes_cleaned': 0, 'active_sessions': 0, 'active_resources': 0, 'active_bytes': 0 } # Resource cleanup handlers self.cleanup_handlers = { 'audio_file': self._cleanup_audio_file, 'temp_file': self._cleanup_temp_file, 'websocket': self._cleanup_websocket, 'stream': self._cleanup_stream } # Initialize storage self._init_storage() # Start cleanup thread self.cleanup_thread = Thread(target=self._cleanup_loop, daemon=True) self.cleanup_thread.start() logger.info("Session manager initialized") def _init_storage(self): """Initialize session storage directory""" try: os.makedirs(self.session_storage_path, mode=0o755, exist_ok=True) logger.info(f"Session storage initialized at {self.session_storage_path}") except Exception as e: logger.error(f"Failed to create session storage: {e}") def create_session(self, session_id: str = None, user_id: str = None, ip_address: str = None, user_agent: str = None) -> UserSession: """Create a new session""" with self.lock: if not session_id: session_id = str(uuid.uuid4()) if session_id in self.sessions: logger.warning(f"Session {session_id} already exists") return self.sessions[session_id] session = UserSession( session_id=session_id, user_id=user_id, ip_address=ip_address, user_agent=user_agent ) self.sessions[session_id] = session self.stats['total_sessions_created'] += 1 self.stats['active_sessions'] = len(self.sessions) # Create session directory session_dir = os.path.join(self.session_storage_path, session_id) try: os.makedirs(session_dir, mode=0o755, exist_ok=True) except Exception as e: logger.error(f"Failed to create session directory: {e}") logger.info(f"Created session {session_id}") return session def get_session(self, session_id: str) -> Optional[UserSession]: """Get a session by ID""" with self.lock: session = self.sessions.get(session_id) if session: session.last_activity = time.time() return session def add_resource(self, session_id: str, resource_type: str, resource_id: str = None, path: str = None, size_bytes: int = 0, metadata: Dict[str, Any] = None) -> Optional[SessionResource]: """Add a resource to a session""" with self.lock: session = self.sessions.get(session_id) if not session: logger.warning(f"Session {session_id} not found") return None # Check limits if len(session.resources) >= self.max_resources_per_session: logger.warning(f"Session {session_id} reached resource limit") self._cleanup_oldest_resources(session, 1) if session.total_bytes_used + size_bytes > self.max_bytes_per_session: logger.warning(f"Session {session_id} reached size limit") bytes_to_free = (session.total_bytes_used + size_bytes) - self.max_bytes_per_session self._cleanup_resources_by_size(session, bytes_to_free) # Create resource if not resource_id: resource_id = str(uuid.uuid4()) resource = SessionResource( resource_id=resource_id, resource_type=resource_type, path=path, size_bytes=size_bytes, metadata=metadata or {} ) session.resources[resource_id] = resource session.total_bytes_used += size_bytes session.last_activity = time.time() # Update stats self.stats['active_resources'] += 1 self.stats['active_bytes'] += size_bytes logger.debug(f"Added {resource_type} resource {resource_id} to session {session_id}") return resource def remove_resource(self, session_id: str, resource_id: str) -> bool: """Remove a resource from a session""" with self.lock: session = self.sessions.get(session_id) if not session: return False resource = session.resources.get(resource_id) if not resource: return False # Cleanup resource self._cleanup_resource(resource) # Remove from session del session.resources[resource_id] session.total_bytes_used -= resource.size_bytes # Update stats self.stats['active_resources'] -= 1 self.stats['active_bytes'] -= resource.size_bytes self.stats['total_resources_cleaned'] += 1 self.stats['total_bytes_cleaned'] += resource.size_bytes logger.debug(f"Removed resource {resource_id} from session {session_id}") return True def update_session_activity(self, session_id: str): """Update session last activity time""" with self.lock: session = self.sessions.get(session_id) if session: session.last_activity = time.time() session.request_count += 1 def cleanup_session(self, session_id: str) -> bool: """Clean up a session and all its resources""" with self.lock: session = self.sessions.get(session_id) if not session: return False # Cleanup all resources for resource_id in list(session.resources.keys()): self.remove_resource(session_id, resource_id) # Remove session directory session_dir = os.path.join(self.session_storage_path, session_id) try: if os.path.exists(session_dir): shutil.rmtree(session_dir) except Exception as e: logger.error(f"Failed to remove session directory: {e}") # Remove session del self.sessions[session_id] # Update stats self.stats['active_sessions'] = len(self.sessions) self.stats['total_sessions_cleaned'] += 1 logger.info(f"Cleaned up session {session_id}") return True def _cleanup_resource(self, resource: SessionResource): """Clean up a single resource""" handler = self.cleanup_handlers.get(resource.resource_type) if handler: try: handler(resource) except Exception as e: logger.error(f"Failed to cleanup {resource.resource_type} {resource.resource_id}: {e}") def _cleanup_audio_file(self, resource: SessionResource): """Clean up audio file resource""" if resource.path and os.path.exists(resource.path): try: os.remove(resource.path) logger.debug(f"Removed audio file {resource.path}") except Exception as e: logger.error(f"Failed to remove audio file {resource.path}: {e}") def _cleanup_temp_file(self, resource: SessionResource): """Clean up temporary file resource""" if resource.path and os.path.exists(resource.path): try: os.remove(resource.path) logger.debug(f"Removed temp file {resource.path}") except Exception as e: logger.error(f"Failed to remove temp file {resource.path}: {e}") def _cleanup_websocket(self, resource: SessionResource): """Clean up websocket resource""" # Implement websocket cleanup if needed pass def _cleanup_stream(self, resource: SessionResource): """Clean up stream resource""" # Implement stream cleanup if needed if resource.metadata.get('stream_id'): # Close any open streams pass def _cleanup_oldest_resources(self, session: UserSession, count: int): """Clean up oldest resources from a session""" # Sort resources by creation time sorted_resources = sorted( session.resources.items(), key=lambda x: x[1].created_at ) # Remove oldest resources for resource_id, _ in sorted_resources[:count]: self.remove_resource(session.session_id, resource_id) def _cleanup_resources_by_size(self, session: UserSession, bytes_to_free: int): """Clean up resources to free up space""" freed_bytes = 0 # Sort resources by size (largest first) sorted_resources = sorted( session.resources.items(), key=lambda x: x[1].size_bytes, reverse=True ) # Remove resources until we've freed enough space for resource_id, resource in sorted_resources: if freed_bytes >= bytes_to_free: break freed_bytes += resource.size_bytes self.remove_resource(session.session_id, resource_id) def _cleanup_loop(self): """Background cleanup thread""" while True: try: time.sleep(self.cleanup_interval) self.cleanup_expired_sessions() self.cleanup_idle_sessions() self.cleanup_orphaned_files() except Exception as e: logger.error(f"Error in cleanup loop: {e}") def cleanup_expired_sessions(self): """Clean up sessions that have exceeded max duration""" with self.lock: now = time.time() expired_sessions = [] for session_id, session in self.sessions.items(): if now - session.created_at > self.max_session_duration: expired_sessions.append(session_id) for session_id in expired_sessions: logger.info(f"Cleaning up expired session {session_id}") self.cleanup_session(session_id) def cleanup_idle_sessions(self): """Clean up sessions that have been idle too long""" with self.lock: now = time.time() idle_sessions = [] for session_id, session in self.sessions.items(): if now - session.last_activity > self.max_idle_time: idle_sessions.append(session_id) for session_id in idle_sessions: logger.info(f"Cleaning up idle session {session_id}") self.cleanup_session(session_id) def cleanup_orphaned_files(self): """Clean up orphaned files in session storage""" try: if not os.path.exists(self.session_storage_path): return # Get all session directories session_dirs = set(os.listdir(self.session_storage_path)) # Get active session IDs with self.lock: active_sessions = set(self.sessions.keys()) # Find orphaned directories orphaned_dirs = session_dirs - active_sessions # Clean up orphaned directories for dir_name in orphaned_dirs: dir_path = os.path.join(self.session_storage_path, dir_name) if os.path.isdir(dir_path): try: shutil.rmtree(dir_path) logger.info(f"Cleaned up orphaned session directory {dir_name}") except Exception as e: logger.error(f"Failed to remove orphaned directory {dir_path}: {e}") except Exception as e: logger.error(f"Error cleaning orphaned files: {e}") def get_session_info(self, session_id: str) -> Optional[Dict[str, Any]]: """Get detailed information about a session""" with self.lock: session = self.sessions.get(session_id) if not session: return None return { 'session_id': session.session_id, 'user_id': session.user_id, 'ip_address': session.ip_address, 'created_at': datetime.fromtimestamp(session.created_at).isoformat(), 'last_activity': datetime.fromtimestamp(session.last_activity).isoformat(), 'duration_seconds': int(time.time() - session.created_at), 'idle_seconds': int(time.time() - session.last_activity), 'request_count': session.request_count, 'resource_count': len(session.resources), 'total_bytes_used': session.total_bytes_used, 'active_streams': session.active_streams, 'resources': [ { 'resource_id': r.resource_id, 'resource_type': r.resource_type, 'size_bytes': r.size_bytes, 'created_at': datetime.fromtimestamp(r.created_at).isoformat(), 'last_accessed': datetime.fromtimestamp(r.last_accessed).isoformat() } for r in session.resources.values() ] } def get_all_sessions_info(self) -> List[Dict[str, Any]]: """Get information about all active sessions""" with self.lock: return [ self.get_session_info(session_id) for session_id in self.sessions.keys() ] def get_stats(self) -> Dict[str, Any]: """Get session manager statistics""" with self.lock: return { **self.stats, 'uptime_seconds': int(time.time() - self.stats.get('start_time', time.time())), 'avg_session_duration': self._calculate_avg_session_duration(), 'avg_resources_per_session': self._calculate_avg_resources_per_session(), 'total_storage_used': self._calculate_total_storage_used() } def _calculate_avg_session_duration(self) -> float: """Calculate average session duration""" if not self.sessions: return 0 total_duration = sum( time.time() - session.created_at for session in self.sessions.values() ) return total_duration / len(self.sessions) def _calculate_avg_resources_per_session(self) -> float: """Calculate average resources per session""" if not self.sessions: return 0 total_resources = sum( len(session.resources) for session in self.sessions.values() ) return total_resources / len(self.sessions) def _calculate_total_storage_used(self) -> int: """Calculate total storage used""" total = 0 try: for root, dirs, files in os.walk(self.session_storage_path): for file in files: filepath = os.path.join(root, file) total += os.path.getsize(filepath) except Exception as e: logger.error(f"Error calculating storage used: {e}") return total def export_metrics(self) -> Dict[str, Any]: """Export metrics for monitoring""" with self.lock: return { 'sessions': { 'active': self.stats['active_sessions'], 'total_created': self.stats['total_sessions_created'], 'total_cleaned': self.stats['total_sessions_cleaned'] }, 'resources': { 'active': self.stats['active_resources'], 'total_cleaned': self.stats['total_resources_cleaned'], 'active_bytes': self.stats['active_bytes'], 'total_bytes_cleaned': self.stats['total_bytes_cleaned'] }, 'limits': { 'max_session_duration': self.max_session_duration, 'max_idle_time': self.max_idle_time, 'max_resources_per_session': self.max_resources_per_session, 'max_bytes_per_session': self.max_bytes_per_session } } # Global session manager instance _session_manager = None _session_lock = Lock() def get_session_manager(config: Dict[str, Any] = None) -> SessionManager: """Get or create global session manager instance""" global _session_manager with _session_lock: if _session_manager is None: _session_manager = SessionManager(config) return _session_manager # Flask integration def init_app(app): """Initialize session management for Flask app""" config = { 'max_session_duration': app.config.get('MAX_SESSION_DURATION', 3600), 'max_idle_time': app.config.get('MAX_SESSION_IDLE_TIME', 900), 'max_resources_per_session': app.config.get('MAX_RESOURCES_PER_SESSION', 100), 'max_bytes_per_session': app.config.get('MAX_BYTES_PER_SESSION', 100 * 1024 * 1024), 'cleanup_interval': app.config.get('SESSION_CLEANUP_INTERVAL', 60), 'session_storage_path': app.config.get('SESSION_STORAGE_PATH', os.path.join(app.config.get('UPLOAD_FOLDER', tempfile.gettempdir()), 'sessions')) } manager = get_session_manager(config) app.session_manager = manager # Add before_request handler @app.before_request def before_request_session(): # Get or create session session_id = session.get('session_id') if not session_id: session_id = str(uuid.uuid4()) session['session_id'] = session_id session.permanent = True # Get session from manager user_session = manager.get_session(session_id) if not user_session: user_session = manager.create_session( session_id=session_id, ip_address=request.remote_addr, user_agent=request.headers.get('User-Agent') ) # Update activity manager.update_session_activity(session_id) # Store in g for request access g.user_session = user_session g.session_manager = manager # Add CLI commands @app.cli.command('sessions-list') def list_sessions_cmd(): """List all active sessions""" sessions = manager.get_all_sessions_info() for session_info in sessions: print(f"\nSession: {session_info['session_id']}") print(f" Created: {session_info['created_at']}") print(f" Last activity: {session_info['last_activity']}") print(f" Resources: {session_info['resource_count']}") print(f" Bytes used: {session_info['total_bytes_used']}") @app.cli.command('sessions-cleanup') def cleanup_sessions_cmd(): """Manual session cleanup""" manager.cleanup_expired_sessions() manager.cleanup_idle_sessions() manager.cleanup_orphaned_files() print("Session cleanup completed") @app.cli.command('sessions-stats') def session_stats_cmd(): """Show session statistics""" stats = manager.get_stats() print(json.dumps(stats, indent=2)) logger.info("Session management initialized") # Decorator for session resource tracking def track_resource(resource_type: str): """Decorator to track resources for a session""" def decorator(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) # Track resource if in request context if hasattr(g, 'user_session') and hasattr(g, 'session_manager'): if isinstance(result, (str, bytes)) or hasattr(result, 'filename'): # Determine path and size path = None size = 0 if isinstance(result, str) and os.path.exists(result): path = result size = os.path.getsize(result) elif hasattr(result, 'filename'): path = result.filename if os.path.exists(path): size = os.path.getsize(path) # Add resource to session g.session_manager.add_resource( session_id=g.user_session.session_id, resource_type=resource_type, path=path, size_bytes=size ) return result return wrapper return decorator