Add health check endpoints and automatic language detection
Health Check Features (Item 12): - Added /health endpoint for basic health monitoring - Added /health/detailed for comprehensive component status - Added /health/ready for Kubernetes readiness probes - Added /health/live for liveness checks - Frontend health monitoring with auto-recovery - Clear stuck requests after 60 seconds - Visual health warnings when service is degraded - Monitoring script for external health checks Automatic Language Detection (Item 13): - Added "Auto-detect" option in source language dropdown - Whisper automatically detects language when auto-detect is selected - Shows detected language in UI after transcription - Updates language selector with detected language - Caches transcriptions with correct detected language 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
149
app.py
149
app.py
@@ -402,9 +402,11 @@ def transcribe():
|
||||
audio_file.save(temp_path)
|
||||
|
||||
try:
|
||||
# Check if we should auto-detect language
|
||||
auto_detect = source_lang == 'auto' or source_lang == ''
|
||||
|
||||
# Use Whisper for transcription with GPU optimizations
|
||||
transcribe_options = {
|
||||
"language": LANGUAGE_TO_CODE.get(source_lang, None),
|
||||
"task": "transcribe",
|
||||
"temperature": 0, # Disable temperature sampling for faster inference
|
||||
"best_of": 1, # Disable beam search for faster inference
|
||||
@@ -416,6 +418,10 @@ def transcribe():
|
||||
"no_speech_threshold": 0.6
|
||||
}
|
||||
|
||||
# Only set language if not auto-detecting
|
||||
if not auto_detect:
|
||||
transcribe_options["language"] = LANGUAGE_TO_CODE.get(source_lang, None)
|
||||
|
||||
# Clear GPU cache before transcription
|
||||
if device.type == 'cuda':
|
||||
torch.cuda.empty_cache()
|
||||
@@ -428,6 +434,19 @@ def transcribe():
|
||||
)
|
||||
|
||||
transcribed_text = result["text"]
|
||||
|
||||
# Get detected language if auto-detection was used
|
||||
detected_language = None
|
||||
if auto_detect and 'language' in result:
|
||||
# Convert language code back to full name
|
||||
detected_code = result['language']
|
||||
for lang_name, lang_code in LANGUAGE_TO_CODE.items():
|
||||
if lang_code == detected_code:
|
||||
detected_language = lang_name
|
||||
break
|
||||
|
||||
# Log detected language
|
||||
logger.info(f"Auto-detected language: {detected_language} ({detected_code})")
|
||||
|
||||
# Send notification if push is enabled
|
||||
if len(push_subscriptions) > 0:
|
||||
@@ -437,10 +456,16 @@ def transcribe():
|
||||
tag="transcription-complete"
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
response = {
|
||||
'success': True,
|
||||
'text': transcribed_text
|
||||
})
|
||||
}
|
||||
|
||||
# Include detected language if auto-detection was used
|
||||
if detected_language:
|
||||
response['detected_language'] = detected_language
|
||||
|
||||
return jsonify(response)
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription error: {str(e)}")
|
||||
return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
|
||||
@@ -598,5 +623,123 @@ def get_audio(filename):
|
||||
logger.error(f"Audio retrieval error: {str(e)}")
|
||||
return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500
|
||||
|
||||
# Health check endpoints for monitoring
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""Basic health check endpoint"""
|
||||
return jsonify({
|
||||
'status': 'healthy',
|
||||
'timestamp': time.time(),
|
||||
'service': 'voice-translator'
|
||||
})
|
||||
|
||||
@app.route('/health/detailed', methods=['GET'])
|
||||
def detailed_health_check():
|
||||
"""Detailed health check with component status"""
|
||||
health_status = {
|
||||
'status': 'healthy',
|
||||
'timestamp': time.time(),
|
||||
'components': {
|
||||
'whisper': {'status': 'unknown'},
|
||||
'ollama': {'status': 'unknown'},
|
||||
'tts': {'status': 'unknown'},
|
||||
'gpu': {'status': 'unknown'}
|
||||
},
|
||||
'metrics': {}
|
||||
}
|
||||
|
||||
# Check Whisper model
|
||||
try:
|
||||
if whisper_model is not None:
|
||||
health_status['components']['whisper']['status'] = 'healthy'
|
||||
health_status['components']['whisper']['model_size'] = MODEL_SIZE
|
||||
else:
|
||||
health_status['components']['whisper']['status'] = 'unhealthy'
|
||||
health_status['status'] = 'degraded'
|
||||
except Exception as e:
|
||||
health_status['components']['whisper']['status'] = 'unhealthy'
|
||||
health_status['components']['whisper']['error'] = str(e)
|
||||
health_status['status'] = 'unhealthy'
|
||||
|
||||
# Check GPU availability
|
||||
try:
|
||||
if torch.cuda.is_available():
|
||||
health_status['components']['gpu']['status'] = 'healthy'
|
||||
health_status['components']['gpu']['device'] = torch.cuda.get_device_name(0)
|
||||
health_status['components']['gpu']['memory_allocated'] = f"{torch.cuda.memory_allocated(0) / 1024**2:.2f} MB"
|
||||
health_status['components']['gpu']['memory_reserved'] = f"{torch.cuda.memory_reserved(0) / 1024**2:.2f} MB"
|
||||
elif torch.backends.mps.is_available():
|
||||
health_status['components']['gpu']['status'] = 'healthy'
|
||||
health_status['components']['gpu']['device'] = 'Apple Silicon GPU'
|
||||
else:
|
||||
health_status['components']['gpu']['status'] = 'not_available'
|
||||
health_status['components']['gpu']['device'] = 'CPU'
|
||||
except Exception as e:
|
||||
health_status['components']['gpu']['status'] = 'error'
|
||||
health_status['components']['gpu']['error'] = str(e)
|
||||
|
||||
# Check Ollama connection
|
||||
try:
|
||||
ollama_models = ollama.list()
|
||||
health_status['components']['ollama']['status'] = 'healthy'
|
||||
health_status['components']['ollama']['available_models'] = len(ollama_models.get('models', []))
|
||||
except Exception as e:
|
||||
health_status['components']['ollama']['status'] = 'unhealthy'
|
||||
health_status['components']['ollama']['error'] = str(e)
|
||||
health_status['status'] = 'degraded'
|
||||
|
||||
# Check TTS server
|
||||
try:
|
||||
tts_response = requests.get(app.config['TTS_SERVER'].replace('/v1/audio/speech', '/health'), timeout=5)
|
||||
if tts_response.status_code == 200:
|
||||
health_status['components']['tts']['status'] = 'healthy'
|
||||
health_status['components']['tts']['server_url'] = app.config['TTS_SERVER']
|
||||
else:
|
||||
health_status['components']['tts']['status'] = 'unhealthy'
|
||||
health_status['components']['tts']['http_status'] = tts_response.status_code
|
||||
health_status['status'] = 'degraded'
|
||||
except Exception as e:
|
||||
health_status['components']['tts']['status'] = 'unhealthy'
|
||||
health_status['components']['tts']['error'] = str(e)
|
||||
health_status['status'] = 'degraded'
|
||||
|
||||
# Add system metrics
|
||||
health_status['metrics']['uptime'] = time.time() - app.start_time if hasattr(app, 'start_time') else 0
|
||||
health_status['metrics']['request_count'] = getattr(app, 'request_count', 0)
|
||||
|
||||
# Set appropriate HTTP status code
|
||||
http_status = 200 if health_status['status'] == 'healthy' else 503 if health_status['status'] == 'unhealthy' else 200
|
||||
|
||||
return jsonify(health_status), http_status
|
||||
|
||||
@app.route('/health/ready', methods=['GET'])
|
||||
def readiness_check():
|
||||
"""Readiness probe - checks if service is ready to accept traffic"""
|
||||
try:
|
||||
# Check if all critical components are loaded
|
||||
if whisper_model is None:
|
||||
return jsonify({'status': 'not_ready', 'reason': 'Whisper model not loaded'}), 503
|
||||
|
||||
# Check Ollama connection
|
||||
ollama.list()
|
||||
|
||||
return jsonify({'status': 'ready', 'timestamp': time.time()})
|
||||
except Exception as e:
|
||||
return jsonify({'status': 'not_ready', 'reason': str(e)}), 503
|
||||
|
||||
@app.route('/health/live', methods=['GET'])
|
||||
def liveness_check():
|
||||
"""Liveness probe - basic check to see if process is alive"""
|
||||
return jsonify({'status': 'alive', 'timestamp': time.time()})
|
||||
|
||||
# Initialize app start time for metrics
|
||||
app.start_time = time.time()
|
||||
app.request_count = 0
|
||||
|
||||
# Middleware to count requests
|
||||
@app.before_request
|
||||
def before_request():
|
||||
app.request_count = getattr(app, 'request_count', 0) + 1
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5005, debug=True)
|
||||
|
||||
Reference in New Issue
Block a user