Health Check Features (Item 12): - Added /health endpoint for basic health monitoring - Added /health/detailed for comprehensive component status - Added /health/ready for Kubernetes readiness probes - Added /health/live for liveness checks - Frontend health monitoring with auto-recovery - Clear stuck requests after 60 seconds - Visual health warnings when service is degraded - Monitoring script for external health checks Automatic Language Detection (Item 13): - Added "Auto-detect" option in source language dropdown - Whisper automatically detects language when auto-detect is selected - Shows detected language in UI after transcription - Updates language selector with detected language - Caches transcriptions with correct detected language 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
91 lines
3.5 KiB
Python
Executable File
91 lines
3.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Health monitoring script for Talk2Me application
|
|
Usage: python health-monitor.py [--detailed] [--interval SECONDS]
|
|
"""
|
|
|
|
import requests
|
|
import time
|
|
import argparse
|
|
import json
|
|
from datetime import datetime
|
|
|
|
def check_health(url, detailed=False):
|
|
"""Check health of the Talk2Me service"""
|
|
endpoint = f"{url}/health/detailed" if detailed else f"{url}/health"
|
|
|
|
try:
|
|
response = requests.get(endpoint, timeout=5)
|
|
data = response.json()
|
|
|
|
if detailed:
|
|
print(f"\n=== Health Check at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===")
|
|
print(f"Overall Status: {data['status'].upper()}")
|
|
print("\nComponent Status:")
|
|
for component, status in data['components'].items():
|
|
status_icon = "✅" if status.get('status') == 'healthy' else "❌"
|
|
print(f" {status_icon} {component}: {status.get('status', 'unknown')}")
|
|
if 'error' in status:
|
|
print(f" Error: {status['error']}")
|
|
if 'device' in status:
|
|
print(f" Device: {status['device']}")
|
|
if 'model_size' in status:
|
|
print(f" Model: {status['model_size']}")
|
|
|
|
if 'metrics' in data:
|
|
print("\nMetrics:")
|
|
uptime = data['metrics'].get('uptime', 0)
|
|
hours = int(uptime // 3600)
|
|
minutes = int((uptime % 3600) // 60)
|
|
print(f" Uptime: {hours}h {minutes}m")
|
|
print(f" Request Count: {data['metrics'].get('request_count', 0)}")
|
|
else:
|
|
status_icon = "✅" if response.status_code == 200 else "❌"
|
|
print(f"{status_icon} {datetime.now().strftime('%H:%M:%S')} - Status: {data.get('status', 'unknown')}")
|
|
|
|
return response.status_code == 200
|
|
|
|
except requests.exceptions.ConnectionError:
|
|
print(f"❌ {datetime.now().strftime('%H:%M:%S')} - Connection failed")
|
|
return False
|
|
except requests.exceptions.Timeout:
|
|
print(f"❌ {datetime.now().strftime('%H:%M:%S')} - Request timeout")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ {datetime.now().strftime('%H:%M:%S')} - Error: {str(e)}")
|
|
return False
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Monitor Talk2Me service health')
|
|
parser.add_argument('--url', default='http://localhost:5005', help='Service URL')
|
|
parser.add_argument('--detailed', action='store_true', help='Show detailed health info')
|
|
parser.add_argument('--interval', type=int, default=30, help='Check interval in seconds')
|
|
parser.add_argument('--once', action='store_true', help='Run once and exit')
|
|
|
|
args = parser.parse_args()
|
|
|
|
print(f"Monitoring {args.url}")
|
|
print("Press Ctrl+C to stop\n")
|
|
|
|
consecutive_failures = 0
|
|
|
|
try:
|
|
while True:
|
|
success = check_health(args.url, args.detailed)
|
|
|
|
if not success:
|
|
consecutive_failures += 1
|
|
if consecutive_failures >= 3:
|
|
print(f"\n⚠️ ALERT: Service has been down for {consecutive_failures} consecutive checks!")
|
|
else:
|
|
consecutive_failures = 0
|
|
|
|
if args.once:
|
|
break
|
|
|
|
time.sleep(args.interval)
|
|
except KeyboardInterrupt:
|
|
print("\n\nMonitoring stopped.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |