Add health check endpoints and automatic language detection

Health Check Features (Item 12):
- Added /health endpoint for basic health monitoring
- Added /health/detailed for comprehensive component status
- Added /health/ready for Kubernetes readiness probes
- Added /health/live for liveness checks
- Frontend health monitoring with auto-recovery
- Clear stuck requests after 60 seconds
- Visual health warnings when service is degraded
- Monitoring script for external health checks

Automatic Language Detection (Item 13):
- Added "Auto-detect" option in source language dropdown
- Whisper automatically detects language when auto-detect is selected
- Shows detected language in UI after transcription
- Updates language selector with detected language
- Caches transcriptions with correct detected language

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Adolfo Delorenzo 2025-06-02 22:37:38 -06:00
parent 829e8c3978
commit 0c9186e57e
6 changed files with 382 additions and 6 deletions

149
app.py
View File

@ -402,9 +402,11 @@ def transcribe():
audio_file.save(temp_path)
try:
# Check if we should auto-detect language
auto_detect = source_lang == 'auto' or source_lang == ''
# Use Whisper for transcription with GPU optimizations
transcribe_options = {
"language": LANGUAGE_TO_CODE.get(source_lang, None),
"task": "transcribe",
"temperature": 0, # Disable temperature sampling for faster inference
"best_of": 1, # Disable beam search for faster inference
@ -416,6 +418,10 @@ def transcribe():
"no_speech_threshold": 0.6
}
# Only set language if not auto-detecting
if not auto_detect:
transcribe_options["language"] = LANGUAGE_TO_CODE.get(source_lang, None)
# Clear GPU cache before transcription
if device.type == 'cuda':
torch.cuda.empty_cache()
@ -429,6 +435,19 @@ def transcribe():
transcribed_text = result["text"]
# Get detected language if auto-detection was used
detected_language = None
if auto_detect and 'language' in result:
# Convert language code back to full name
detected_code = result['language']
for lang_name, lang_code in LANGUAGE_TO_CODE.items():
if lang_code == detected_code:
detected_language = lang_name
break
# Log detected language
logger.info(f"Auto-detected language: {detected_language} ({detected_code})")
# Send notification if push is enabled
if len(push_subscriptions) > 0:
send_push_notification(
@ -437,10 +456,16 @@ def transcribe():
tag="transcription-complete"
)
return jsonify({
response = {
'success': True,
'text': transcribed_text
})
}
# Include detected language if auto-detection was used
if detected_language:
response['detected_language'] = detected_language
return jsonify(response)
except Exception as e:
logger.error(f"Transcription error: {str(e)}")
return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
@ -598,5 +623,123 @@ def get_audio(filename):
logger.error(f"Audio retrieval error: {str(e)}")
return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500
# Health check endpoints for monitoring
@app.route('/health', methods=['GET'])
def health_check():
"""Basic health check endpoint"""
return jsonify({
'status': 'healthy',
'timestamp': time.time(),
'service': 'voice-translator'
})
@app.route('/health/detailed', methods=['GET'])
def detailed_health_check():
"""Detailed health check with component status"""
health_status = {
'status': 'healthy',
'timestamp': time.time(),
'components': {
'whisper': {'status': 'unknown'},
'ollama': {'status': 'unknown'},
'tts': {'status': 'unknown'},
'gpu': {'status': 'unknown'}
},
'metrics': {}
}
# Check Whisper model
try:
if whisper_model is not None:
health_status['components']['whisper']['status'] = 'healthy'
health_status['components']['whisper']['model_size'] = MODEL_SIZE
else:
health_status['components']['whisper']['status'] = 'unhealthy'
health_status['status'] = 'degraded'
except Exception as e:
health_status['components']['whisper']['status'] = 'unhealthy'
health_status['components']['whisper']['error'] = str(e)
health_status['status'] = 'unhealthy'
# Check GPU availability
try:
if torch.cuda.is_available():
health_status['components']['gpu']['status'] = 'healthy'
health_status['components']['gpu']['device'] = torch.cuda.get_device_name(0)
health_status['components']['gpu']['memory_allocated'] = f"{torch.cuda.memory_allocated(0) / 1024**2:.2f} MB"
health_status['components']['gpu']['memory_reserved'] = f"{torch.cuda.memory_reserved(0) / 1024**2:.2f} MB"
elif torch.backends.mps.is_available():
health_status['components']['gpu']['status'] = 'healthy'
health_status['components']['gpu']['device'] = 'Apple Silicon GPU'
else:
health_status['components']['gpu']['status'] = 'not_available'
health_status['components']['gpu']['device'] = 'CPU'
except Exception as e:
health_status['components']['gpu']['status'] = 'error'
health_status['components']['gpu']['error'] = str(e)
# Check Ollama connection
try:
ollama_models = ollama.list()
health_status['components']['ollama']['status'] = 'healthy'
health_status['components']['ollama']['available_models'] = len(ollama_models.get('models', []))
except Exception as e:
health_status['components']['ollama']['status'] = 'unhealthy'
health_status['components']['ollama']['error'] = str(e)
health_status['status'] = 'degraded'
# Check TTS server
try:
tts_response = requests.get(app.config['TTS_SERVER'].replace('/v1/audio/speech', '/health'), timeout=5)
if tts_response.status_code == 200:
health_status['components']['tts']['status'] = 'healthy'
health_status['components']['tts']['server_url'] = app.config['TTS_SERVER']
else:
health_status['components']['tts']['status'] = 'unhealthy'
health_status['components']['tts']['http_status'] = tts_response.status_code
health_status['status'] = 'degraded'
except Exception as e:
health_status['components']['tts']['status'] = 'unhealthy'
health_status['components']['tts']['error'] = str(e)
health_status['status'] = 'degraded'
# Add system metrics
health_status['metrics']['uptime'] = time.time() - app.start_time if hasattr(app, 'start_time') else 0
health_status['metrics']['request_count'] = getattr(app, 'request_count', 0)
# Set appropriate HTTP status code
http_status = 200 if health_status['status'] == 'healthy' else 503 if health_status['status'] == 'unhealthy' else 200
return jsonify(health_status), http_status
@app.route('/health/ready', methods=['GET'])
def readiness_check():
"""Readiness probe - checks if service is ready to accept traffic"""
try:
# Check if all critical components are loaded
if whisper_model is None:
return jsonify({'status': 'not_ready', 'reason': 'Whisper model not loaded'}), 503
# Check Ollama connection
ollama.list()
return jsonify({'status': 'ready', 'timestamp': time.time()})
except Exception as e:
return jsonify({'status': 'not_ready', 'reason': str(e)}), 503
@app.route('/health/live', methods=['GET'])
def liveness_check():
"""Liveness probe - basic check to see if process is alive"""
return jsonify({'status': 'alive', 'timestamp': time.time()})
# Initialize app start time for metrics
app.start_time = time.time()
app.request_count = 0
# Middleware to count requests
@app.before_request
def before_request():
app.request_count = getattr(app, 'request_count', 0) + 1
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5005, debug=True)

91
health-monitor.py Executable file
View File

@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""
Health monitoring script for Talk2Me application
Usage: python health-monitor.py [--detailed] [--interval SECONDS]
"""
import requests
import time
import argparse
import json
from datetime import datetime
def check_health(url, detailed=False):
"""Check health of the Talk2Me service"""
endpoint = f"{url}/health/detailed" if detailed else f"{url}/health"
try:
response = requests.get(endpoint, timeout=5)
data = response.json()
if detailed:
print(f"\n=== Health Check at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===")
print(f"Overall Status: {data['status'].upper()}")
print("\nComponent Status:")
for component, status in data['components'].items():
status_icon = "" if status.get('status') == 'healthy' else ""
print(f" {status_icon} {component}: {status.get('status', 'unknown')}")
if 'error' in status:
print(f" Error: {status['error']}")
if 'device' in status:
print(f" Device: {status['device']}")
if 'model_size' in status:
print(f" Model: {status['model_size']}")
if 'metrics' in data:
print("\nMetrics:")
uptime = data['metrics'].get('uptime', 0)
hours = int(uptime // 3600)
minutes = int((uptime % 3600) // 60)
print(f" Uptime: {hours}h {minutes}m")
print(f" Request Count: {data['metrics'].get('request_count', 0)}")
else:
status_icon = "" if response.status_code == 200 else ""
print(f"{status_icon} {datetime.now().strftime('%H:%M:%S')} - Status: {data.get('status', 'unknown')}")
return response.status_code == 200
except requests.exceptions.ConnectionError:
print(f"{datetime.now().strftime('%H:%M:%S')} - Connection failed")
return False
except requests.exceptions.Timeout:
print(f"{datetime.now().strftime('%H:%M:%S')} - Request timeout")
return False
except Exception as e:
print(f"{datetime.now().strftime('%H:%M:%S')} - Error: {str(e)}")
return False
def main():
parser = argparse.ArgumentParser(description='Monitor Talk2Me service health')
parser.add_argument('--url', default='http://localhost:5005', help='Service URL')
parser.add_argument('--detailed', action='store_true', help='Show detailed health info')
parser.add_argument('--interval', type=int, default=30, help='Check interval in seconds')
parser.add_argument('--once', action='store_true', help='Run once and exit')
args = parser.parse_args()
print(f"Monitoring {args.url}")
print("Press Ctrl+C to stop\n")
consecutive_failures = 0
try:
while True:
success = check_health(args.url, args.detailed)
if not success:
consecutive_failures += 1
if consecutive_failures >= 3:
print(f"\n⚠️ ALERT: Service has been down for {consecutive_failures} consecutive checks!")
else:
consecutive_failures = 0
if args.once:
break
time.sleep(args.interval)
except KeyboardInterrupt:
print("\n\nMonitoring stopped.")
if __name__ == "__main__":
main()

View File

@ -107,6 +107,9 @@ function initApp(): void {
// Initialize queue status updates
initQueueStatus();
// Start health monitoring
startHealthMonitoring();
// Update TTS server URL and API key
updateTtsServer.addEventListener('click', function() {
const newUrl = ttsServerUrl.value.trim();
@ -153,6 +156,11 @@ function initApp(): void {
// Event listeners for language selection
sourceLanguage.addEventListener('change', function() {
// Skip conflict check for auto-detect
if (sourceLanguage.value === 'auto') {
return;
}
if (targetLanguage.value === sourceLanguage.value) {
for (let i = 0; i < targetLanguage.options.length; i++) {
if (targetLanguage.options[i].value !== sourceLanguage.value) {
@ -383,10 +391,24 @@ function initApp(): void {
if (data.success && data.text) {
currentSourceText = data.text;
// Handle auto-detected language
if (data.detected_language && sourceLanguage.value === 'auto') {
// Update the source language selector
sourceLanguage.value = data.detected_language;
// Show detected language info
sourceText.innerHTML = `<p class="fade-in">${data.text}</p>
<small class="text-muted">Detected language: ${data.detected_language}</small>`;
statusIndicator.textContent = `Transcription complete (${data.detected_language} detected)`;
} else {
sourceText.innerHTML = `<p class="fade-in">${data.text}</p>`;
statusIndicator.textContent = 'Transcription complete';
}
playSource.disabled = false;
translateBtn.disabled = false;
statusIndicator.textContent = 'Transcription complete';
statusIndicator.classList.remove('processing');
statusIndicator.classList.add('success');
setTimeout(() => statusIndicator.classList.remove('success'), 2000);
@ -394,7 +416,7 @@ function initApp(): void {
// Cache the transcription in IndexedDB
saveToIndexedDB('transcriptions', {
text: data.text,
language: sourceLanguage.value,
language: data.detected_language || sourceLanguage.value,
timestamp: new Date().toISOString()
} as TranscriptionRecord);
} else {
@ -753,6 +775,99 @@ function initApp(): void {
// Initial update
updateQueueDisplay();
}
// Health monitoring and auto-recovery
function startHealthMonitoring(): void {
let consecutiveFailures = 0;
const maxConsecutiveFailures = 3;
async function checkHealth(): Promise<void> {
try {
const response = await fetch('/health', {
method: 'GET',
signal: AbortSignal.timeout(5000) // 5 second timeout
});
if (response.ok) {
consecutiveFailures = 0;
// Remove any health warning if shown
const healthWarning = document.getElementById('healthWarning');
if (healthWarning) {
healthWarning.style.display = 'none';
}
} else {
handleHealthCheckFailure();
}
} catch (error) {
handleHealthCheckFailure();
}
}
function handleHealthCheckFailure(): void {
consecutiveFailures++;
console.warn(`Health check failed (${consecutiveFailures}/${maxConsecutiveFailures})`);
if (consecutiveFailures >= maxConsecutiveFailures) {
showHealthWarning();
// Attempt auto-recovery
attemptAutoRecovery();
}
}
function showHealthWarning(): void {
let healthWarning = document.getElementById('healthWarning');
if (!healthWarning) {
healthWarning = document.createElement('div');
healthWarning.id = 'healthWarning';
healthWarning.className = 'alert alert-warning alert-dismissible fade show position-fixed top-0 start-50 translate-middle-x mt-3';
healthWarning.style.zIndex = '9999';
healthWarning.innerHTML = `
<i class="fas fa-exclamation-triangle"></i> Service health check failed.
Some features may be unavailable.
<button type="button" class="btn-close" data-bs-dismiss="alert"></button>
`;
document.body.appendChild(healthWarning);
}
healthWarning.style.display = 'block';
}
async function attemptAutoRecovery(): Promise<void> {
console.log('Attempting auto-recovery...');
// Clear any stuck requests in the queue
const queue = RequestQueueManager.getInstance();
queue.clearStuckRequests();
// Re-check TTS server
checkTtsServer();
// Try to reload service worker if available
if ('serviceWorker' in navigator) {
try {
const registration = await navigator.serviceWorker.getRegistration();
if (registration) {
await registration.update();
console.log('Service worker updated');
}
} catch (error) {
console.error('Failed to update service worker:', error);
}
}
// Reset failure counter after recovery attempt
setTimeout(() => {
consecutiveFailures = 0;
}, 30000); // Wait 30 seconds before resetting
}
// Check health every 30 seconds
setInterval(checkHealth, 30000);
// Initial health check after 5 seconds
setTimeout(checkHealth, 5000);
}
}

View File

@ -217,6 +217,31 @@ export class RequestQueueManager {
this.queue = [];
}
// Clear stuck requests (requests older than 60 seconds)
clearStuckRequests(): void {
const now = Date.now();
const stuckThreshold = 60000; // 60 seconds
// Clear stuck active requests
this.activeRequests.forEach((request, id) => {
if (now - request.timestamp > stuckThreshold) {
console.warn(`Clearing stuck active request: ${request.type}`);
request.reject(new Error('Request timeout - cleared by recovery'));
this.activeRequests.delete(id);
}
});
// Clear old queued requests
this.queue = this.queue.filter(request => {
if (now - request.timestamp > stuckThreshold) {
console.warn(`Clearing stuck queued request: ${request.type}`);
request.reject(new Error('Request timeout - cleared by recovery'));
return false;
}
return true;
});
}
// Update settings
updateSettings(settings: {
maxConcurrent?: number;

View File

@ -4,6 +4,7 @@ export interface TranscriptionResponse {
success: boolean;
text?: string;
error?: string;
detected_language?: string;
}
export interface TranslationResponse {

View File

@ -133,6 +133,7 @@
</div>
<div class="card-body">
<select id="sourceLanguage" class="form-select language-select mb-3">
<option value="auto">Auto-detect</option>
{% for language in languages %}
<option value="{{ language }}">{{ language }}</option>
{% endfor %}