From 8c8600d458e7c14e6544244a4e79dbc7cd5402a7 Mon Sep 17 00:00:00 2001
From: Adolfo Delorenzo <adelorenzo@oe74.net>
Date: Mon, 7 Apr 2025 09:18:05 -0600
Subject: [PATCH] working version

---
 app.py | 72 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/app.py b/app.py
index bc666d8..39c74a7 100644
--- a/app.py
+++ b/app.py
@@ -28,7 +28,7 @@ def root_files(filename):
         'apple-touch-icon-120x120.png',
         'apple-touch-icon-120x120-precomposed.png'
     ]
-    
+
     if filename in common_icons:
         # Map to appropriate icon in static/icons
         icon_mapping = {
@@ -38,9 +38,9 @@ def root_files(filename):
             'apple-touch-icon-120x120.png': 'apple-icon-120x120.png',
             'apple-touch-icon-120x120-precomposed.png': 'apple-icon-120x120.png'
         }
-        
+
         return send_from_directory('static/icons', icon_mapping.get(filename, 'apple-icon-180x180.png'))
-    
+
     # If not an icon, return 404
     return "File not found", 404
 
@@ -206,11 +206,11 @@ LANGUAGE_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()}
 
 # Map language names to OpenAI TTS voice options
 LANGUAGE_TO_VOICE = {
-    "Arabic": "ar-EG-ShakirNeural",      # Using OpenAI general voices 
-    "Armenian": "echo",     # as OpenAI doesn't have specific voices
-    "Azerbaijani": "az-AZ-BanuNeural",  # for all these languages
-    "English": "en-GB-RyanNeural",      # We'll use the available voices
-    "French": "fr-FR-EloiseNeural",      # and rely on the translation being
+    "Arabic": "ar-EG-ShakirNeural",      # Using OpenAI general voices
+    "Armenian": "echo",                  # as OpenAI doesn't have specific voices
+    "Azerbaijani": "az-AZ-BanuNeural",   # for all these languages
+    "English": "en-GB-RyanNeural",       # We'll use the available voices
+    "French": "fr-FR-DeniseNeural",      # and rely on the translation being
     "Georgian": "ka-GE-GiorgiNeural",    # in the correct language text
     "Kazakh": "kk-KZ-DauletNeural",
     "Mandarin": "zh-CN-YunjianNeural",
@@ -230,22 +230,22 @@ def index():
 def transcribe():
     if 'audio' not in request.files:
         return jsonify({'error': 'No audio file provided'}), 400
-    
+
     audio_file = request.files['audio']
     source_lang = request.form.get('source_lang', '')
-    
+
     # Save the audio file temporarily
     temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav')
     audio_file.save(temp_path)
-    
+
     try:
         # Use Whisper for transcription
         result = whisper_model.transcribe(
-            temp_path, 
+            temp_path,
             language=LANGUAGE_TO_CODE.get(source_lang, None)
         )
         transcribed_text = result["text"]
-        
+
         return jsonify({
             'success': True,
             'text': transcribed_text
@@ -265,19 +265,19 @@ def translate():
         text = data.get('text', '')
         source_lang = data.get('source_lang', '')
         target_lang = data.get('target_lang', '')
-        
+
         if not text or not source_lang or not target_lang:
             return jsonify({'error': 'Missing required parameters'}), 400
-        
+
         # Create a prompt for Gemma 3 translation
         prompt = f"""
         Translate the following text from {source_lang} to {target_lang}:
-        
+
         "{text}"
-        
+
         Provide only the translation without any additional text.
         """
-        
+
         # Use Ollama to interact with Gemma 3
         response = ollama.chat(
             model="gemma3:27b",
@@ -288,9 +288,9 @@ def translate():
                 }
             ]
         )
-        
+
         translated_text = response['message']['content'].strip()
-        
+
         return jsonify({
             'success': True,
             'translation': translated_text
@@ -305,29 +305,29 @@ def speak():
         data = request.json
         text = data.get('text', '')
         language = data.get('language', '')
-        
+
         if not text or not language:
             return jsonify({'error': 'Missing required parameters'}), 400
-        
+
         voice = LANGUAGE_TO_VOICE.get(language, 'echo')  # Default to echo if language not found
-        
+
         # Get TTS server URL and API key from config
         tts_server_url = app.config['TTS_SERVER']
         tts_api_key = app.config['TTS_API_KEY']
-        
+
         try:
             # Request TTS from the OpenAI Edge TTS server
             logger.info(f"Sending TTS request to {tts_server_url}")
-            
+
             headers = {
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {tts_api_key}"
             }
-            
+
             # Log request details for debugging
             logger.info(f"Text for TTS: {text}")
             logger.info(f"Selected voice: {voice}")
-            
+
             # Proper OpenAI TTS payload
             payload = {
                 "input": text,
@@ -335,26 +335,26 @@ def speak():
                 "response_format": "mp3",
                 "speed": 1.0
             }
-            
+
             logger.debug(f"Full TTS request payload: {payload}")
-            
+
             # Dump the payload to ensure proper JSON formatting
             payload_json = json.dumps(payload)
             logger.debug(f"Serialized payload: {payload_json}")
-            
+
             tts_response = requests.post(
                 tts_server_url,
                 headers=headers,
                 json=payload,  # Use json parameter to ensure proper serialization
                 timeout=15  # Longer timeout for audio generation
             )
-            
+
             logger.info(f"TTS response status: {tts_response.status_code}")
-            
+
             if tts_response.status_code != 200:
                 error_msg = f'TTS request failed with status {tts_response.status_code}'
                 logger.error(error_msg)
-                
+
                 # Try to get error details from response if possible
                 try:
                     error_details = tts_response.json()
@@ -364,14 +364,14 @@ def speak():
                     logger.error(f"Could not parse error response: {str(e)}")
                     # Log the raw response content 
                     logger.error(f"Raw response: {tts_response.text[:200]}")
-                    
+
                 return jsonify({'error': error_msg}), 500
-            
+
             # The response contains the audio data directly
             temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3')
             with open(temp_audio_path, 'wb') as f:
                 f.write(tts_response.content)
-            
+
             return jsonify({
                 'success': True,
                 'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}'