talk2me/whisper_config.py

"""
Whisper Model Configuration and Optimization Settings
"""

# Model selection based on available resources
# Available models: tiny, base, small, medium, large
MODEL_SIZE = "base"  # ~140MB, good balance of speed and accuracy

# GPU Optimization Settings
GPU_OPTIMIZATIONS = {
    "enable_tf32": True,  # TensorFloat-32 for Ampere GPUs
    "enable_cudnn_benchmark": True,  # Auto-tune convolution algorithms
    "use_fp16": True,  # Half precision for faster inference
    "pre_allocate_memory": True,  # Reduce memory fragmentation
    "warm_up_gpu": True  # Cache CUDA kernels on startup
}

# Transcription Settings for Speed
TRANSCRIBE_OPTIONS = {
    "task": "transcribe",
    "temperature": 0,  # Disable sampling
    "best_of": 1,  # No beam search
    "beam_size": 1,  # Single beam
    "condition_on_previous_text": False,  # Faster inference
    "compression_ratio_threshold": 2.4,
    "logprob_threshold": -1.0,
    "no_speech_threshold": 0.6,
    "word_timestamps": False  # Disable if not needed
}

# Memory Management
MEMORY_SETTINGS = {
    "clear_cache_after_transcribe": True,
    "force_garbage_collection": True,
    "max_concurrent_transcriptions": 1  # Prevent memory overflow
}

# Performance Monitoring
ENABLE_PERFORMANCE_LOGGING = True