Add health check endpoints and automatic language detection

Health Check Features (Item 12):
- Added /health endpoint for basic health monitoring
- Added /health/detailed for comprehensive component status
- Added /health/ready for Kubernetes readiness probes
- Added /health/live for liveness checks
- Frontend health monitoring with auto-recovery
- Clear stuck requests after 60 seconds
- Visual health warnings when service is degraded
- Monitoring script for external health checks

Automatic Language Detection (Item 13):
- Added "Auto-detect" option in source language dropdown
- Whisper automatically detects language when auto-detect is selected
- Shows detected language in UI after transcription
- Updates language selector with detected language
- Caches transcriptions with correct detected language

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-06-02 22:37:38 -06:00
parent 829e8c3978
commit 0c9186e57e
6 changed files with 382 additions and 6 deletions

149
app.py
View File

@@ -402,9 +402,11 @@ def transcribe():
audio_file.save(temp_path)
try:
# Check if we should auto-detect language
auto_detect = source_lang == 'auto' or source_lang == ''
# Use Whisper for transcription with GPU optimizations
transcribe_options = {
"language": LANGUAGE_TO_CODE.get(source_lang, None),
"task": "transcribe",
"temperature": 0, # Disable temperature sampling for faster inference
"best_of": 1, # Disable beam search for faster inference
@@ -416,6 +418,10 @@ def transcribe():
"no_speech_threshold": 0.6
}
# Only set language if not auto-detecting
if not auto_detect:
transcribe_options["language"] = LANGUAGE_TO_CODE.get(source_lang, None)
# Clear GPU cache before transcription
if device.type == 'cuda':
torch.cuda.empty_cache()
@@ -428,6 +434,19 @@ def transcribe():
)
transcribed_text = result["text"]
# Get detected language if auto-detection was used
detected_language = None
if auto_detect and 'language' in result:
# Convert language code back to full name
detected_code = result['language']
for lang_name, lang_code in LANGUAGE_TO_CODE.items():
if lang_code == detected_code:
detected_language = lang_name
break
# Log detected language
logger.info(f"Auto-detected language: {detected_language} ({detected_code})")
# Send notification if push is enabled
if len(push_subscriptions) > 0:
@@ -437,10 +456,16 @@ def transcribe():
tag="transcription-complete"
)
return jsonify({
response = {
'success': True,
'text': transcribed_text
})
}
# Include detected language if auto-detection was used
if detected_language:
response['detected_language'] = detected_language
return jsonify(response)
except Exception as e:
logger.error(f"Transcription error: {str(e)}")
return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
@@ -598,5 +623,123 @@ def get_audio(filename):
logger.error(f"Audio retrieval error: {str(e)}")
return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500
# Health check endpoints for monitoring
@app.route('/health', methods=['GET'])
def health_check():
"""Basic health check endpoint"""
return jsonify({
'status': 'healthy',
'timestamp': time.time(),
'service': 'voice-translator'
})
@app.route('/health/detailed', methods=['GET'])
def detailed_health_check():
"""Detailed health check with component status"""
health_status = {
'status': 'healthy',
'timestamp': time.time(),
'components': {
'whisper': {'status': 'unknown'},
'ollama': {'status': 'unknown'},
'tts': {'status': 'unknown'},
'gpu': {'status': 'unknown'}
},
'metrics': {}
}
# Check Whisper model
try:
if whisper_model is not None:
health_status['components']['whisper']['status'] = 'healthy'
health_status['components']['whisper']['model_size'] = MODEL_SIZE
else:
health_status['components']['whisper']['status'] = 'unhealthy'
health_status['status'] = 'degraded'
except Exception as e:
health_status['components']['whisper']['status'] = 'unhealthy'
health_status['components']['whisper']['error'] = str(e)
health_status['status'] = 'unhealthy'
# Check GPU availability
try:
if torch.cuda.is_available():
health_status['components']['gpu']['status'] = 'healthy'
health_status['components']['gpu']['device'] = torch.cuda.get_device_name(0)
health_status['components']['gpu']['memory_allocated'] = f"{torch.cuda.memory_allocated(0) / 1024**2:.2f} MB"
health_status['components']['gpu']['memory_reserved'] = f"{torch.cuda.memory_reserved(0) / 1024**2:.2f} MB"
elif torch.backends.mps.is_available():
health_status['components']['gpu']['status'] = 'healthy'
health_status['components']['gpu']['device'] = 'Apple Silicon GPU'
else:
health_status['components']['gpu']['status'] = 'not_available'
health_status['components']['gpu']['device'] = 'CPU'
except Exception as e:
health_status['components']['gpu']['status'] = 'error'
health_status['components']['gpu']['error'] = str(e)
# Check Ollama connection
try:
ollama_models = ollama.list()
health_status['components']['ollama']['status'] = 'healthy'
health_status['components']['ollama']['available_models'] = len(ollama_models.get('models', []))
except Exception as e:
health_status['components']['ollama']['status'] = 'unhealthy'
health_status['components']['ollama']['error'] = str(e)
health_status['status'] = 'degraded'
# Check TTS server
try:
tts_response = requests.get(app.config['TTS_SERVER'].replace('/v1/audio/speech', '/health'), timeout=5)
if tts_response.status_code == 200:
health_status['components']['tts']['status'] = 'healthy'
health_status['components']['tts']['server_url'] = app.config['TTS_SERVER']
else:
health_status['components']['tts']['status'] = 'unhealthy'
health_status['components']['tts']['http_status'] = tts_response.status_code
health_status['status'] = 'degraded'
except Exception as e:
health_status['components']['tts']['status'] = 'unhealthy'
health_status['components']['tts']['error'] = str(e)
health_status['status'] = 'degraded'
# Add system metrics
health_status['metrics']['uptime'] = time.time() - app.start_time if hasattr(app, 'start_time') else 0
health_status['metrics']['request_count'] = getattr(app, 'request_count', 0)
# Set appropriate HTTP status code
http_status = 200 if health_status['status'] == 'healthy' else 503 if health_status['status'] == 'unhealthy' else 200
return jsonify(health_status), http_status
@app.route('/health/ready', methods=['GET'])
def readiness_check():
"""Readiness probe - checks if service is ready to accept traffic"""
try:
# Check if all critical components are loaded
if whisper_model is None:
return jsonify({'status': 'not_ready', 'reason': 'Whisper model not loaded'}), 503
# Check Ollama connection
ollama.list()
return jsonify({'status': 'ready', 'timestamp': time.time()})
except Exception as e:
return jsonify({'status': 'not_ready', 'reason': str(e)}), 503
@app.route('/health/live', methods=['GET'])
def liveness_check():
"""Liveness probe - basic check to see if process is alive"""
return jsonify({'status': 'alive', 'timestamp': time.time()})
# Initialize app start time for metrics
app.start_time = time.time()
app.request_count = 0
# Middleware to count requests
@app.before_request
def before_request():
app.request_count = getattr(app, 'request_count', 0) + 1
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5005, debug=True)