From 0c9186e57e5093e50caa1817b6998cb9c5cd811c Mon Sep 17 00:00:00 2001 From: Adolfo Delorenzo Date: Mon, 2 Jun 2025 22:37:38 -0600 Subject: [PATCH] Add health check endpoints and automatic language detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Health Check Features (Item 12): - Added /health endpoint for basic health monitoring - Added /health/detailed for comprehensive component status - Added /health/ready for Kubernetes readiness probes - Added /health/live for liveness checks - Frontend health monitoring with auto-recovery - Clear stuck requests after 60 seconds - Visual health warnings when service is degraded - Monitoring script for external health checks Automatic Language Detection (Item 13): - Added "Auto-detect" option in source language dropdown - Whisper automatically detects language when auto-detect is selected - Shows detected language in UI after transcription - Updates language selector with detected language - Caches transcriptions with correct detected language šŸ¤– Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- app.py | 149 +++++++++++++++++++++++++++++++++- health-monitor.py | 91 +++++++++++++++++++++ static/js/src/app.ts | 121 ++++++++++++++++++++++++++- static/js/src/requestQueue.ts | 25 ++++++ static/js/src/types.ts | 1 + templates/index.html | 1 + 6 files changed, 382 insertions(+), 6 deletions(-) create mode 100755 health-monitor.py diff --git a/app.py b/app.py index 7a5c00e..e5f10fd 100644 --- a/app.py +++ b/app.py @@ -402,9 +402,11 @@ def transcribe(): audio_file.save(temp_path) try: + # Check if we should auto-detect language + auto_detect = source_lang == 'auto' or source_lang == '' + # Use Whisper for transcription with GPU optimizations transcribe_options = { - "language": LANGUAGE_TO_CODE.get(source_lang, None), "task": "transcribe", "temperature": 0, # Disable temperature sampling for faster inference "best_of": 1, # Disable beam search for faster inference @@ -416,6 +418,10 @@ def transcribe(): "no_speech_threshold": 0.6 } + # Only set language if not auto-detecting + if not auto_detect: + transcribe_options["language"] = LANGUAGE_TO_CODE.get(source_lang, None) + # Clear GPU cache before transcription if device.type == 'cuda': torch.cuda.empty_cache() @@ -428,6 +434,19 @@ def transcribe(): ) transcribed_text = result["text"] + + # Get detected language if auto-detection was used + detected_language = None + if auto_detect and 'language' in result: + # Convert language code back to full name + detected_code = result['language'] + for lang_name, lang_code in LANGUAGE_TO_CODE.items(): + if lang_code == detected_code: + detected_language = lang_name + break + + # Log detected language + logger.info(f"Auto-detected language: {detected_language} ({detected_code})") # Send notification if push is enabled if len(push_subscriptions) > 0: @@ -437,10 +456,16 @@ def transcribe(): tag="transcription-complete" ) - return jsonify({ + response = { 'success': True, 'text': transcribed_text - }) + } + + # Include detected language if auto-detection was used + if detected_language: + response['detected_language'] = detected_language + + return jsonify(response) except Exception as e: logger.error(f"Transcription error: {str(e)}") return jsonify({'error': f'Transcription failed: {str(e)}'}), 500 @@ -598,5 +623,123 @@ def get_audio(filename): logger.error(f"Audio retrieval error: {str(e)}") return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500 +# Health check endpoints for monitoring +@app.route('/health', methods=['GET']) +def health_check(): + """Basic health check endpoint""" + return jsonify({ + 'status': 'healthy', + 'timestamp': time.time(), + 'service': 'voice-translator' + }) + +@app.route('/health/detailed', methods=['GET']) +def detailed_health_check(): + """Detailed health check with component status""" + health_status = { + 'status': 'healthy', + 'timestamp': time.time(), + 'components': { + 'whisper': {'status': 'unknown'}, + 'ollama': {'status': 'unknown'}, + 'tts': {'status': 'unknown'}, + 'gpu': {'status': 'unknown'} + }, + 'metrics': {} + } + + # Check Whisper model + try: + if whisper_model is not None: + health_status['components']['whisper']['status'] = 'healthy' + health_status['components']['whisper']['model_size'] = MODEL_SIZE + else: + health_status['components']['whisper']['status'] = 'unhealthy' + health_status['status'] = 'degraded' + except Exception as e: + health_status['components']['whisper']['status'] = 'unhealthy' + health_status['components']['whisper']['error'] = str(e) + health_status['status'] = 'unhealthy' + + # Check GPU availability + try: + if torch.cuda.is_available(): + health_status['components']['gpu']['status'] = 'healthy' + health_status['components']['gpu']['device'] = torch.cuda.get_device_name(0) + health_status['components']['gpu']['memory_allocated'] = f"{torch.cuda.memory_allocated(0) / 1024**2:.2f} MB" + health_status['components']['gpu']['memory_reserved'] = f"{torch.cuda.memory_reserved(0) / 1024**2:.2f} MB" + elif torch.backends.mps.is_available(): + health_status['components']['gpu']['status'] = 'healthy' + health_status['components']['gpu']['device'] = 'Apple Silicon GPU' + else: + health_status['components']['gpu']['status'] = 'not_available' + health_status['components']['gpu']['device'] = 'CPU' + except Exception as e: + health_status['components']['gpu']['status'] = 'error' + health_status['components']['gpu']['error'] = str(e) + + # Check Ollama connection + try: + ollama_models = ollama.list() + health_status['components']['ollama']['status'] = 'healthy' + health_status['components']['ollama']['available_models'] = len(ollama_models.get('models', [])) + except Exception as e: + health_status['components']['ollama']['status'] = 'unhealthy' + health_status['components']['ollama']['error'] = str(e) + health_status['status'] = 'degraded' + + # Check TTS server + try: + tts_response = requests.get(app.config['TTS_SERVER'].replace('/v1/audio/speech', '/health'), timeout=5) + if tts_response.status_code == 200: + health_status['components']['tts']['status'] = 'healthy' + health_status['components']['tts']['server_url'] = app.config['TTS_SERVER'] + else: + health_status['components']['tts']['status'] = 'unhealthy' + health_status['components']['tts']['http_status'] = tts_response.status_code + health_status['status'] = 'degraded' + except Exception as e: + health_status['components']['tts']['status'] = 'unhealthy' + health_status['components']['tts']['error'] = str(e) + health_status['status'] = 'degraded' + + # Add system metrics + health_status['metrics']['uptime'] = time.time() - app.start_time if hasattr(app, 'start_time') else 0 + health_status['metrics']['request_count'] = getattr(app, 'request_count', 0) + + # Set appropriate HTTP status code + http_status = 200 if health_status['status'] == 'healthy' else 503 if health_status['status'] == 'unhealthy' else 200 + + return jsonify(health_status), http_status + +@app.route('/health/ready', methods=['GET']) +def readiness_check(): + """Readiness probe - checks if service is ready to accept traffic""" + try: + # Check if all critical components are loaded + if whisper_model is None: + return jsonify({'status': 'not_ready', 'reason': 'Whisper model not loaded'}), 503 + + # Check Ollama connection + ollama.list() + + return jsonify({'status': 'ready', 'timestamp': time.time()}) + except Exception as e: + return jsonify({'status': 'not_ready', 'reason': str(e)}), 503 + +@app.route('/health/live', methods=['GET']) +def liveness_check(): + """Liveness probe - basic check to see if process is alive""" + return jsonify({'status': 'alive', 'timestamp': time.time()}) + +# Initialize app start time for metrics +app.start_time = time.time() +app.request_count = 0 + +# Middleware to count requests +@app.before_request +def before_request(): + app.request_count = getattr(app, 'request_count', 0) + 1 + if __name__ == '__main__': app.run(host='0.0.0.0', port=5005, debug=True) diff --git a/health-monitor.py b/health-monitor.py new file mode 100755 index 0000000..d490f09 --- /dev/null +++ b/health-monitor.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +""" +Health monitoring script for Talk2Me application +Usage: python health-monitor.py [--detailed] [--interval SECONDS] +""" + +import requests +import time +import argparse +import json +from datetime import datetime + +def check_health(url, detailed=False): + """Check health of the Talk2Me service""" + endpoint = f"{url}/health/detailed" if detailed else f"{url}/health" + + try: + response = requests.get(endpoint, timeout=5) + data = response.json() + + if detailed: + print(f"\n=== Health Check at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===") + print(f"Overall Status: {data['status'].upper()}") + print("\nComponent Status:") + for component, status in data['components'].items(): + status_icon = "āœ…" if status.get('status') == 'healthy' else "āŒ" + print(f" {status_icon} {component}: {status.get('status', 'unknown')}") + if 'error' in status: + print(f" Error: {status['error']}") + if 'device' in status: + print(f" Device: {status['device']}") + if 'model_size' in status: + print(f" Model: {status['model_size']}") + + if 'metrics' in data: + print("\nMetrics:") + uptime = data['metrics'].get('uptime', 0) + hours = int(uptime // 3600) + minutes = int((uptime % 3600) // 60) + print(f" Uptime: {hours}h {minutes}m") + print(f" Request Count: {data['metrics'].get('request_count', 0)}") + else: + status_icon = "āœ…" if response.status_code == 200 else "āŒ" + print(f"{status_icon} {datetime.now().strftime('%H:%M:%S')} - Status: {data.get('status', 'unknown')}") + + return response.status_code == 200 + + except requests.exceptions.ConnectionError: + print(f"āŒ {datetime.now().strftime('%H:%M:%S')} - Connection failed") + return False + except requests.exceptions.Timeout: + print(f"āŒ {datetime.now().strftime('%H:%M:%S')} - Request timeout") + return False + except Exception as e: + print(f"āŒ {datetime.now().strftime('%H:%M:%S')} - Error: {str(e)}") + return False + +def main(): + parser = argparse.ArgumentParser(description='Monitor Talk2Me service health') + parser.add_argument('--url', default='http://localhost:5005', help='Service URL') + parser.add_argument('--detailed', action='store_true', help='Show detailed health info') + parser.add_argument('--interval', type=int, default=30, help='Check interval in seconds') + parser.add_argument('--once', action='store_true', help='Run once and exit') + + args = parser.parse_args() + + print(f"Monitoring {args.url}") + print("Press Ctrl+C to stop\n") + + consecutive_failures = 0 + + try: + while True: + success = check_health(args.url, args.detailed) + + if not success: + consecutive_failures += 1 + if consecutive_failures >= 3: + print(f"\nāš ļø ALERT: Service has been down for {consecutive_failures} consecutive checks!") + else: + consecutive_failures = 0 + + if args.once: + break + + time.sleep(args.interval) + except KeyboardInterrupt: + print("\n\nMonitoring stopped.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/static/js/src/app.ts b/static/js/src/app.ts index 7ad1b1a..dd2d2a4 100644 --- a/static/js/src/app.ts +++ b/static/js/src/app.ts @@ -106,6 +106,9 @@ function initApp(): void { // Initialize queue status updates initQueueStatus(); + + // Start health monitoring + startHealthMonitoring(); // Update TTS server URL and API key updateTtsServer.addEventListener('click', function() { @@ -153,6 +156,11 @@ function initApp(): void { // Event listeners for language selection sourceLanguage.addEventListener('change', function() { + // Skip conflict check for auto-detect + if (sourceLanguage.value === 'auto') { + return; + } + if (targetLanguage.value === sourceLanguage.value) { for (let i = 0; i < targetLanguage.options.length; i++) { if (targetLanguage.options[i].value !== sourceLanguage.value) { @@ -383,10 +391,24 @@ function initApp(): void { if (data.success && data.text) { currentSourceText = data.text; - sourceText.innerHTML = `

${data.text}

`; + + // Handle auto-detected language + if (data.detected_language && sourceLanguage.value === 'auto') { + // Update the source language selector + sourceLanguage.value = data.detected_language; + + // Show detected language info + sourceText.innerHTML = `

${data.text}

+ Detected language: ${data.detected_language}`; + + statusIndicator.textContent = `Transcription complete (${data.detected_language} detected)`; + } else { + sourceText.innerHTML = `

${data.text}

`; + statusIndicator.textContent = 'Transcription complete'; + } + playSource.disabled = false; translateBtn.disabled = false; - statusIndicator.textContent = 'Transcription complete'; statusIndicator.classList.remove('processing'); statusIndicator.classList.add('success'); setTimeout(() => statusIndicator.classList.remove('success'), 2000); @@ -394,7 +416,7 @@ function initApp(): void { // Cache the transcription in IndexedDB saveToIndexedDB('transcriptions', { text: data.text, - language: sourceLanguage.value, + language: data.detected_language || sourceLanguage.value, timestamp: new Date().toISOString() } as TranscriptionRecord); } else { @@ -753,6 +775,99 @@ function initApp(): void { // Initial update updateQueueDisplay(); } + + // Health monitoring and auto-recovery + function startHealthMonitoring(): void { + let consecutiveFailures = 0; + const maxConsecutiveFailures = 3; + + async function checkHealth(): Promise { + try { + const response = await fetch('/health', { + method: 'GET', + signal: AbortSignal.timeout(5000) // 5 second timeout + }); + + if (response.ok) { + consecutiveFailures = 0; + + // Remove any health warning if shown + const healthWarning = document.getElementById('healthWarning'); + if (healthWarning) { + healthWarning.style.display = 'none'; + } + } else { + handleHealthCheckFailure(); + } + } catch (error) { + handleHealthCheckFailure(); + } + } + + function handleHealthCheckFailure(): void { + consecutiveFailures++; + console.warn(`Health check failed (${consecutiveFailures}/${maxConsecutiveFailures})`); + + if (consecutiveFailures >= maxConsecutiveFailures) { + showHealthWarning(); + + // Attempt auto-recovery + attemptAutoRecovery(); + } + } + + function showHealthWarning(): void { + let healthWarning = document.getElementById('healthWarning'); + if (!healthWarning) { + healthWarning = document.createElement('div'); + healthWarning.id = 'healthWarning'; + healthWarning.className = 'alert alert-warning alert-dismissible fade show position-fixed top-0 start-50 translate-middle-x mt-3'; + healthWarning.style.zIndex = '9999'; + healthWarning.innerHTML = ` + Service health check failed. + Some features may be unavailable. + + `; + document.body.appendChild(healthWarning); + } + healthWarning.style.display = 'block'; + } + + async function attemptAutoRecovery(): Promise { + console.log('Attempting auto-recovery...'); + + // Clear any stuck requests in the queue + const queue = RequestQueueManager.getInstance(); + queue.clearStuckRequests(); + + // Re-check TTS server + checkTtsServer(); + + // Try to reload service worker if available + if ('serviceWorker' in navigator) { + try { + const registration = await navigator.serviceWorker.getRegistration(); + if (registration) { + await registration.update(); + console.log('Service worker updated'); + } + } catch (error) { + console.error('Failed to update service worker:', error); + } + } + + // Reset failure counter after recovery attempt + setTimeout(() => { + consecutiveFailures = 0; + }, 30000); // Wait 30 seconds before resetting + } + + // Check health every 30 seconds + setInterval(checkHealth, 30000); + + // Initial health check after 5 seconds + setTimeout(checkHealth, 5000); + } } diff --git a/static/js/src/requestQueue.ts b/static/js/src/requestQueue.ts index 0ddcc5c..03f5b84 100644 --- a/static/js/src/requestQueue.ts +++ b/static/js/src/requestQueue.ts @@ -216,6 +216,31 @@ export class RequestQueueManager { }); this.queue = []; } + + // Clear stuck requests (requests older than 60 seconds) + clearStuckRequests(): void { + const now = Date.now(); + const stuckThreshold = 60000; // 60 seconds + + // Clear stuck active requests + this.activeRequests.forEach((request, id) => { + if (now - request.timestamp > stuckThreshold) { + console.warn(`Clearing stuck active request: ${request.type}`); + request.reject(new Error('Request timeout - cleared by recovery')); + this.activeRequests.delete(id); + } + }); + + // Clear old queued requests + this.queue = this.queue.filter(request => { + if (now - request.timestamp > stuckThreshold) { + console.warn(`Clearing stuck queued request: ${request.type}`); + request.reject(new Error('Request timeout - cleared by recovery')); + return false; + } + return true; + }); + } // Update settings updateSettings(settings: { diff --git a/static/js/src/types.ts b/static/js/src/types.ts index 096565d..aa76546 100644 --- a/static/js/src/types.ts +++ b/static/js/src/types.ts @@ -4,6 +4,7 @@ export interface TranscriptionResponse { success: boolean; text?: string; error?: string; + detected_language?: string; } export interface TranslationResponse { diff --git a/templates/index.html b/templates/index.html index c7391df..c8b0e4a 100644 --- a/templates/index.html +++ b/templates/index.html @@ -133,6 +133,7 @@