""" Whisper Model Configuration and Optimization Settings """ # Model selection based on available resources # Available models: tiny, base, small, medium, large MODEL_SIZE = "base" # ~140MB, good balance of speed and accuracy # GPU Optimization Settings GPU_OPTIMIZATIONS = { "enable_tf32": True, # TensorFloat-32 for Ampere GPUs "enable_cudnn_benchmark": True, # Auto-tune convolution algorithms "use_fp16": True, # Half precision for faster inference "pre_allocate_memory": True, # Reduce memory fragmentation "warm_up_gpu": True # Cache CUDA kernels on startup } # Transcription Settings for Speed TRANSCRIBE_OPTIONS = { "task": "transcribe", "temperature": 0, # Disable sampling "best_of": 1, # No beam search "beam_size": 1, # Single beam "condition_on_previous_text": False, # Faster inference "compression_ratio_threshold": 2.4, "logprob_threshold": -1.0, "no_speech_threshold": 0.6, "word_timestamps": False # Disable if not needed } # Memory Management MEMORY_SETTINGS = { "clear_cache_after_transcribe": True, "force_garbage_collection": True, "max_concurrent_transcriptions": 1 # Prevent memory overflow } # Performance Monitoring ENABLE_PERFORMANCE_LOGGING = True