#!/usr/bin/env python3 """ Health monitoring script for Talk2Me application Usage: python health-monitor.py [--detailed] [--interval SECONDS] """ import requests import time import argparse import json from datetime import datetime def check_health(url, detailed=False): """Check health of the Talk2Me service""" endpoint = f"{url}/health/detailed" if detailed else f"{url}/health" try: response = requests.get(endpoint, timeout=5) data = response.json() if detailed: print(f"\n=== Health Check at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===") print(f"Overall Status: {data['status'].upper()}") print("\nComponent Status:") for component, status in data['components'].items(): status_icon = "✅" if status.get('status') == 'healthy' else "❌" print(f" {status_icon} {component}: {status.get('status', 'unknown')}") if 'error' in status: print(f" Error: {status['error']}") if 'device' in status: print(f" Device: {status['device']}") if 'model_size' in status: print(f" Model: {status['model_size']}") if 'metrics' in data: print("\nMetrics:") uptime = data['metrics'].get('uptime', 0) hours = int(uptime // 3600) minutes = int((uptime % 3600) // 60) print(f" Uptime: {hours}h {minutes}m") print(f" Request Count: {data['metrics'].get('request_count', 0)}") else: status_icon = "✅" if response.status_code == 200 else "❌" print(f"{status_icon} {datetime.now().strftime('%H:%M:%S')} - Status: {data.get('status', 'unknown')}") return response.status_code == 200 except requests.exceptions.ConnectionError: print(f"❌ {datetime.now().strftime('%H:%M:%S')} - Connection failed") return False except requests.exceptions.Timeout: print(f"❌ {datetime.now().strftime('%H:%M:%S')} - Request timeout") return False except Exception as e: print(f"❌ {datetime.now().strftime('%H:%M:%S')} - Error: {str(e)}") return False def main(): parser = argparse.ArgumentParser(description='Monitor Talk2Me service health') parser.add_argument('--url', default='http://localhost:5005', help='Service URL') parser.add_argument('--detailed', action='store_true', help='Show detailed health info') parser.add_argument('--interval', type=int, default=30, help='Check interval in seconds') parser.add_argument('--once', action='store_true', help='Run once and exit') args = parser.parse_args() print(f"Monitoring {args.url}") print("Press Ctrl+C to stop\n") consecutive_failures = 0 try: while True: success = check_health(args.url, args.detailed) if not success: consecutive_failures += 1 if consecutive_failures >= 3: print(f"\n⚠️ ALERT: Service has been down for {consecutive_failures} consecutive checks!") else: consecutive_failures = 0 if args.once: break time.sleep(args.interval) except KeyboardInterrupt: print("\n\nMonitoring stopped.") if __name__ == "__main__": main()