talk2me/health-monitor.py
Adolfo Delorenzo 0c9186e57e Add health check endpoints and automatic language detection
Health Check Features (Item 12):
- Added /health endpoint for basic health monitoring
- Added /health/detailed for comprehensive component status
- Added /health/ready for Kubernetes readiness probes
- Added /health/live for liveness checks
- Frontend health monitoring with auto-recovery
- Clear stuck requests after 60 seconds
- Visual health warnings when service is degraded
- Monitoring script for external health checks

Automatic Language Detection (Item 13):
- Added "Auto-detect" option in source language dropdown
- Whisper automatically detects language when auto-detect is selected
- Shows detected language in UI after transcription
- Updates language selector with detected language
- Caches transcriptions with correct detected language

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-02 22:37:38 -06:00

91 lines
3.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Health monitoring script for Talk2Me application
Usage: python health-monitor.py [--detailed] [--interval SECONDS]
"""
import requests
import time
import argparse
import json
from datetime import datetime
def check_health(url, detailed=False):
"""Check health of the Talk2Me service"""
endpoint = f"{url}/health/detailed" if detailed else f"{url}/health"
try:
response = requests.get(endpoint, timeout=5)
data = response.json()
if detailed:
print(f"\n=== Health Check at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===")
print(f"Overall Status: {data['status'].upper()}")
print("\nComponent Status:")
for component, status in data['components'].items():
status_icon = "" if status.get('status') == 'healthy' else ""
print(f" {status_icon} {component}: {status.get('status', 'unknown')}")
if 'error' in status:
print(f" Error: {status['error']}")
if 'device' in status:
print(f" Device: {status['device']}")
if 'model_size' in status:
print(f" Model: {status['model_size']}")
if 'metrics' in data:
print("\nMetrics:")
uptime = data['metrics'].get('uptime', 0)
hours = int(uptime // 3600)
minutes = int((uptime % 3600) // 60)
print(f" Uptime: {hours}h {minutes}m")
print(f" Request Count: {data['metrics'].get('request_count', 0)}")
else:
status_icon = "" if response.status_code == 200 else ""
print(f"{status_icon} {datetime.now().strftime('%H:%M:%S')} - Status: {data.get('status', 'unknown')}")
return response.status_code == 200
except requests.exceptions.ConnectionError:
print(f"{datetime.now().strftime('%H:%M:%S')} - Connection failed")
return False
except requests.exceptions.Timeout:
print(f"{datetime.now().strftime('%H:%M:%S')} - Request timeout")
return False
except Exception as e:
print(f"{datetime.now().strftime('%H:%M:%S')} - Error: {str(e)}")
return False
def main():
parser = argparse.ArgumentParser(description='Monitor Talk2Me service health')
parser.add_argument('--url', default='http://localhost:5005', help='Service URL')
parser.add_argument('--detailed', action='store_true', help='Show detailed health info')
parser.add_argument('--interval', type=int, default=30, help='Check interval in seconds')
parser.add_argument('--once', action='store_true', help='Run once and exit')
args = parser.parse_args()
print(f"Monitoring {args.url}")
print("Press Ctrl+C to stop\n")
consecutive_failures = 0
try:
while True:
success = check_health(args.url, args.detailed)
if not success:
consecutive_failures += 1
if consecutive_failures >= 3:
print(f"\n⚠️ ALERT: Service has been down for {consecutive_failures} consecutive checks!")
else:
consecutive_failures = 0
if args.once:
break
time.sleep(args.interval)
except KeyboardInterrupt:
print("\n\nMonitoring stopped.")
if __name__ == "__main__":
main()