Add health check endpoints and automatic language detection

Health Check Features (Item 12): - Added /health endpoint for basic health monitoring - Added /health/detailed for comprehensive component status - Added /health/ready for Kubernetes readiness probes - Added /health/live for liveness checks - Frontend health monitoring with auto-recovery - Clear stuck requests after 60 seconds - Visual health warnings when service is degraded - Monitoring script for external health checks Automatic Language Detection (Item 13): - Added "Auto-detect" option in source language dropdown - Whisper automatically detects language when auto-detect is selected - Shows detected language in UI after transcription - Updates language selector with detected language - Caches transcriptions with correct detected language 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-02 22:37:38 -06:00
parent 829e8c3978
commit 0c9186e57e
6 changed files with 382 additions and 6 deletions
--- a/app.py
+++ b/app.py
@@ -402,9 +402,11 @@ def transcribe():
    audio_file.save(temp_path)

    try:
+        # Check if we should auto-detect language
+        auto_detect = source_lang == 'auto' or source_lang == ''
+        
        # Use Whisper for transcription with GPU optimizations
        transcribe_options = {
-            "language": LANGUAGE_TO_CODE.get(source_lang, None),
            "task": "transcribe",
            "temperature": 0,  # Disable temperature sampling for faster inference
            "best_of": 1,  # Disable beam search for faster inference
@@ -416,6 +418,10 @@ def transcribe():
            "no_speech_threshold": 0.6
        }
        
+        # Only set language if not auto-detecting
+        if not auto_detect:
+            transcribe_options["language"] = LANGUAGE_TO_CODE.get(source_lang, None)
+        
        # Clear GPU cache before transcription
        if device.type == 'cuda':
            torch.cuda.empty_cache()
@@ -428,6 +434,19 @@ def transcribe():
            )
        
        transcribed_text = result["text"]
+        
+        # Get detected language if auto-detection was used
+        detected_language = None
+        if auto_detect and 'language' in result:
+            # Convert language code back to full name
+            detected_code = result['language']
+            for lang_name, lang_code in LANGUAGE_TO_CODE.items():
+                if lang_code == detected_code:
+                    detected_language = lang_name
+                    break
+            
+            # Log detected language
+            logger.info(f"Auto-detected language: {detected_language} ({detected_code})")

        # Send notification if push is enabled
        if len(push_subscriptions) > 0:
@@ -437,10 +456,16 @@ def transcribe():
                tag="transcription-complete"
            )
        
-        return jsonify({
+        response = {
            'success': True,
            'text': transcribed_text
-        })
+        }
+        
+        # Include detected language if auto-detection was used
+        if detected_language:
+            response['detected_language'] = detected_language
+            
+        return jsonify(response)
    except Exception as e:
        logger.error(f"Transcription error: {str(e)}")
        return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
@@ -598,5 +623,123 @@ def get_audio(filename):
        logger.error(f"Audio retrieval error: {str(e)}")
        return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500

+# Health check endpoints for monitoring
+@app.route('/health', methods=['GET'])
+def health_check():
+    """Basic health check endpoint"""
+    return jsonify({
+        'status': 'healthy',
+        'timestamp': time.time(),
+        'service': 'voice-translator'
+    })
+
+@app.route('/health/detailed', methods=['GET'])
+def detailed_health_check():
+    """Detailed health check with component status"""
+    health_status = {
+        'status': 'healthy',
+        'timestamp': time.time(),
+        'components': {
+            'whisper': {'status': 'unknown'},
+            'ollama': {'status': 'unknown'},
+            'tts': {'status': 'unknown'},
+            'gpu': {'status': 'unknown'}
+        },
+        'metrics': {}
+    }
+    
+    # Check Whisper model
+    try:
+        if whisper_model is not None:
+            health_status['components']['whisper']['status'] = 'healthy'
+            health_status['components']['whisper']['model_size'] = MODEL_SIZE
+        else:
+            health_status['components']['whisper']['status'] = 'unhealthy'
+            health_status['status'] = 'degraded'
+    except Exception as e:
+        health_status['components']['whisper']['status'] = 'unhealthy'
+        health_status['components']['whisper']['error'] = str(e)
+        health_status['status'] = 'unhealthy'
+    
+    # Check GPU availability
+    try:
+        if torch.cuda.is_available():
+            health_status['components']['gpu']['status'] = 'healthy'
+            health_status['components']['gpu']['device'] = torch.cuda.get_device_name(0)
+            health_status['components']['gpu']['memory_allocated'] = f"{torch.cuda.memory_allocated(0) / 1024**2:.2f} MB"
+            health_status['components']['gpu']['memory_reserved'] = f"{torch.cuda.memory_reserved(0) / 1024**2:.2f} MB"
+        elif torch.backends.mps.is_available():
+            health_status['components']['gpu']['status'] = 'healthy'
+            health_status['components']['gpu']['device'] = 'Apple Silicon GPU'
+        else:
+            health_status['components']['gpu']['status'] = 'not_available'
+            health_status['components']['gpu']['device'] = 'CPU'
+    except Exception as e:
+        health_status['components']['gpu']['status'] = 'error'
+        health_status['components']['gpu']['error'] = str(e)
+    
+    # Check Ollama connection
+    try:
+        ollama_models = ollama.list()
+        health_status['components']['ollama']['status'] = 'healthy'
+        health_status['components']['ollama']['available_models'] = len(ollama_models.get('models', []))
+    except Exception as e:
+        health_status['components']['ollama']['status'] = 'unhealthy'
+        health_status['components']['ollama']['error'] = str(e)
+        health_status['status'] = 'degraded'
+    
+    # Check TTS server
+    try:
+        tts_response = requests.get(app.config['TTS_SERVER'].replace('/v1/audio/speech', '/health'), timeout=5)
+        if tts_response.status_code == 200:
+            health_status['components']['tts']['status'] = 'healthy'
+            health_status['components']['tts']['server_url'] = app.config['TTS_SERVER']
+        else:
+            health_status['components']['tts']['status'] = 'unhealthy'
+            health_status['components']['tts']['http_status'] = tts_response.status_code
+            health_status['status'] = 'degraded'
+    except Exception as e:
+        health_status['components']['tts']['status'] = 'unhealthy'
+        health_status['components']['tts']['error'] = str(e)
+        health_status['status'] = 'degraded'
+    
+    # Add system metrics
+    health_status['metrics']['uptime'] = time.time() - app.start_time if hasattr(app, 'start_time') else 0
+    health_status['metrics']['request_count'] = getattr(app, 'request_count', 0)
+    
+    # Set appropriate HTTP status code
+    http_status = 200 if health_status['status'] == 'healthy' else 503 if health_status['status'] == 'unhealthy' else 200
+    
+    return jsonify(health_status), http_status
+
+@app.route('/health/ready', methods=['GET'])
+def readiness_check():
+    """Readiness probe - checks if service is ready to accept traffic"""
+    try:
+        # Check if all critical components are loaded
+        if whisper_model is None:
+            return jsonify({'status': 'not_ready', 'reason': 'Whisper model not loaded'}), 503
+        
+        # Check Ollama connection
+        ollama.list()
+        
+        return jsonify({'status': 'ready', 'timestamp': time.time()})
+    except Exception as e:
+        return jsonify({'status': 'not_ready', 'reason': str(e)}), 503
+
+@app.route('/health/live', methods=['GET'])
+def liveness_check():
+    """Liveness probe - basic check to see if process is alive"""
+    return jsonify({'status': 'alive', 'timestamp': time.time()})
+
+# Initialize app start time for metrics
+app.start_time = time.time()
+app.request_count = 0
+
+# Middleware to count requests
+@app.before_request
+def before_request():
+    app.request_count = getattr(app, 'request_count', 0) + 1
+
 if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5005, debug=True)