working version

2025-04-07 09:18:05 -06:00
parent 1043402516
commit 8c8600d458
1 changed files with 36 additions and 36 deletions
--- a/app.py
+++ b/app.py
@@ -28,7 +28,7 @@ def root_files(filename):
        'apple-touch-icon-120x120.png',
        'apple-touch-icon-120x120-precomposed.png'
    ]
-    
+
    if filename in common_icons:
        # Map to appropriate icon in static/icons
        icon_mapping = {
@@ -38,9 +38,9 @@ def root_files(filename):
            'apple-touch-icon-120x120.png': 'apple-icon-120x120.png',
            'apple-touch-icon-120x120-precomposed.png': 'apple-icon-120x120.png'
        }
-        
+
        return send_from_directory('static/icons', icon_mapping.get(filename, 'apple-icon-180x180.png'))
-    
+
    # If not an icon, return 404
    return "File not found", 404
@@ -206,11 +206,11 @@ LANGUAGE_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()}
 # Map language names to OpenAI TTS voice options
 LANGUAGE_TO_VOICE = {
-    "Arabic": "ar-EG-ShakirNeural",      # Using OpenAI general voices 
+    "Arabic": "ar-EG-ShakirNeural",      # Using OpenAI general voices
-    "Armenian": "echo",     # as OpenAI doesn't have specific voices
+    "Armenian": "echo",                  # as OpenAI doesn't have specific voices
-    "Azerbaijani": "az-AZ-BanuNeural",  # for all these languages
+    "Azerbaijani": "az-AZ-BanuNeural",   # for all these languages
-    "English": "en-GB-RyanNeural",      # We'll use the available voices
+    "English": "en-GB-RyanNeural",       # We'll use the available voices
-    "French": "fr-FR-EloiseNeural",      # and rely on the translation being
+    "French": "fr-FR-DeniseNeural",      # and rely on the translation being
    "Georgian": "ka-GE-GiorgiNeural",    # in the correct language text
    "Kazakh": "kk-KZ-DauletNeural",
    "Mandarin": "zh-CN-YunjianNeural",
@@ -230,22 +230,22 @@ def index():
 def transcribe():
    if 'audio' not in request.files:
        return jsonify({'error': 'No audio file provided'}), 400
-    
+
    audio_file = request.files['audio']
    source_lang = request.form.get('source_lang', '')
-    
+
    # Save the audio file temporarily
    temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav')
    audio_file.save(temp_path)
-    
+
    try:
        # Use Whisper for transcription
        result = whisper_model.transcribe(
-            temp_path, 
+            temp_path,
            language=LANGUAGE_TO_CODE.get(source_lang, None)
        )
        transcribed_text = result["text"]
-        
+
        return jsonify({
            'success': True,
            'text': transcribed_text
@@ -265,19 +265,19 @@ def translate():
        text = data.get('text', '')
        source_lang = data.get('source_lang', '')
        target_lang = data.get('target_lang', '')
-        
+
        if not text or not source_lang or not target_lang:
            return jsonify({'error': 'Missing required parameters'}), 400
-        
+
        # Create a prompt for Gemma 3 translation
        prompt = f"""
        Translate the following text from {source_lang} to {target_lang}:
-        
+
        "{text}"
-        
+
        Provide only the translation without any additional text.
        """
-        
+
        # Use Ollama to interact with Gemma 3
        response = ollama.chat(
            model="gemma3:27b",
@@ -288,9 +288,9 @@ def translate():
                }
            ]
        )
-        
+
        translated_text = response['message']['content'].strip()
-        
+
        return jsonify({
            'success': True,
            'translation': translated_text
@@ -305,29 +305,29 @@ def speak():
        data = request.json
        text = data.get('text', '')
        language = data.get('language', '')
-        
+
        if not text or not language:
            return jsonify({'error': 'Missing required parameters'}), 400
-        
+
        voice = LANGUAGE_TO_VOICE.get(language, 'echo')  # Default to echo if language not found
-        
+
        # Get TTS server URL and API key from config
        tts_server_url = app.config['TTS_SERVER']
        tts_api_key = app.config['TTS_API_KEY']
-        
+
        try:
            # Request TTS from the OpenAI Edge TTS server
            logger.info(f"Sending TTS request to {tts_server_url}")
-            
+
            headers = {
                "Content-Type": "application/json",
                "Authorization": f"Bearer {tts_api_key}"
            }
-            
+
            # Log request details for debugging
            logger.info(f"Text for TTS: {text}")
            logger.info(f"Selected voice: {voice}")
-            
+
            # Proper OpenAI TTS payload
            payload = {
                "input": text,
@@ -335,26 +335,26 @@ def speak():
                "response_format": "mp3",
                "speed": 1.0
            }
-            
+
            logger.debug(f"Full TTS request payload: {payload}")
-            
+
            # Dump the payload to ensure proper JSON formatting
            payload_json = json.dumps(payload)
            logger.debug(f"Serialized payload: {payload_json}")
-            
+
            tts_response = requests.post(
                tts_server_url,
                headers=headers,
                json=payload,  # Use json parameter to ensure proper serialization
                timeout=15  # Longer timeout for audio generation
            )
-            
+
            logger.info(f"TTS response status: {tts_response.status_code}")
-            
+
            if tts_response.status_code != 200:
                error_msg = f'TTS request failed with status {tts_response.status_code}'
                logger.error(error_msg)
-                
+
                # Try to get error details from response if possible
                try:
                    error_details = tts_response.json()
@@ -364,14 +364,14 @@ def speak():
                    logger.error(f"Could not parse error response: {str(e)}")
                    # Log the raw response content 
                    logger.error(f"Raw response: {tts_response.text[:200]}")
-                    
+
                return jsonify({'error': error_msg}), 500
-            
+
            # The response contains the audio data directly
            temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3')
            with open(temp_audio_path, 'wb') as f:
                f.write(tts_response.content)
-            
+
            return jsonify({
                'success': True,
                'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}'