From 8c8600d458e7c14e6544244a4e79dbc7cd5402a7 Mon Sep 17 00:00:00 2001 From: Adolfo Delorenzo Date: Mon, 7 Apr 2025 09:18:05 -0600 Subject: [PATCH] working version --- app.py | 72 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/app.py b/app.py index bc666d8..39c74a7 100644 --- a/app.py +++ b/app.py @@ -28,7 +28,7 @@ def root_files(filename): 'apple-touch-icon-120x120.png', 'apple-touch-icon-120x120-precomposed.png' ] - + if filename in common_icons: # Map to appropriate icon in static/icons icon_mapping = { @@ -38,9 +38,9 @@ def root_files(filename): 'apple-touch-icon-120x120.png': 'apple-icon-120x120.png', 'apple-touch-icon-120x120-precomposed.png': 'apple-icon-120x120.png' } - + return send_from_directory('static/icons', icon_mapping.get(filename, 'apple-icon-180x180.png')) - + # If not an icon, return 404 return "File not found", 404 @@ -206,11 +206,11 @@ LANGUAGE_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()} # Map language names to OpenAI TTS voice options LANGUAGE_TO_VOICE = { - "Arabic": "ar-EG-ShakirNeural", # Using OpenAI general voices - "Armenian": "echo", # as OpenAI doesn't have specific voices - "Azerbaijani": "az-AZ-BanuNeural", # for all these languages - "English": "en-GB-RyanNeural", # We'll use the available voices - "French": "fr-FR-EloiseNeural", # and rely on the translation being + "Arabic": "ar-EG-ShakirNeural", # Using OpenAI general voices + "Armenian": "echo", # as OpenAI doesn't have specific voices + "Azerbaijani": "az-AZ-BanuNeural", # for all these languages + "English": "en-GB-RyanNeural", # We'll use the available voices + "French": "fr-FR-DeniseNeural", # and rely on the translation being "Georgian": "ka-GE-GiorgiNeural", # in the correct language text "Kazakh": "kk-KZ-DauletNeural", "Mandarin": "zh-CN-YunjianNeural", @@ -230,22 +230,22 @@ def index(): def transcribe(): if 'audio' not in request.files: return jsonify({'error': 'No audio file provided'}), 400 - + audio_file = request.files['audio'] source_lang = request.form.get('source_lang', '') - + # Save the audio file temporarily temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav') audio_file.save(temp_path) - + try: # Use Whisper for transcription result = whisper_model.transcribe( - temp_path, + temp_path, language=LANGUAGE_TO_CODE.get(source_lang, None) ) transcribed_text = result["text"] - + return jsonify({ 'success': True, 'text': transcribed_text @@ -265,19 +265,19 @@ def translate(): text = data.get('text', '') source_lang = data.get('source_lang', '') target_lang = data.get('target_lang', '') - + if not text or not source_lang or not target_lang: return jsonify({'error': 'Missing required parameters'}), 400 - + # Create a prompt for Gemma 3 translation prompt = f""" Translate the following text from {source_lang} to {target_lang}: - + "{text}" - + Provide only the translation without any additional text. """ - + # Use Ollama to interact with Gemma 3 response = ollama.chat( model="gemma3:27b", @@ -288,9 +288,9 @@ def translate(): } ] ) - + translated_text = response['message']['content'].strip() - + return jsonify({ 'success': True, 'translation': translated_text @@ -305,29 +305,29 @@ def speak(): data = request.json text = data.get('text', '') language = data.get('language', '') - + if not text or not language: return jsonify({'error': 'Missing required parameters'}), 400 - + voice = LANGUAGE_TO_VOICE.get(language, 'echo') # Default to echo if language not found - + # Get TTS server URL and API key from config tts_server_url = app.config['TTS_SERVER'] tts_api_key = app.config['TTS_API_KEY'] - + try: # Request TTS from the OpenAI Edge TTS server logger.info(f"Sending TTS request to {tts_server_url}") - + headers = { "Content-Type": "application/json", "Authorization": f"Bearer {tts_api_key}" } - + # Log request details for debugging logger.info(f"Text for TTS: {text}") logger.info(f"Selected voice: {voice}") - + # Proper OpenAI TTS payload payload = { "input": text, @@ -335,26 +335,26 @@ def speak(): "response_format": "mp3", "speed": 1.0 } - + logger.debug(f"Full TTS request payload: {payload}") - + # Dump the payload to ensure proper JSON formatting payload_json = json.dumps(payload) logger.debug(f"Serialized payload: {payload_json}") - + tts_response = requests.post( tts_server_url, headers=headers, json=payload, # Use json parameter to ensure proper serialization timeout=15 # Longer timeout for audio generation ) - + logger.info(f"TTS response status: {tts_response.status_code}") - + if tts_response.status_code != 200: error_msg = f'TTS request failed with status {tts_response.status_code}' logger.error(error_msg) - + # Try to get error details from response if possible try: error_details = tts_response.json() @@ -364,14 +364,14 @@ def speak(): logger.error(f"Could not parse error response: {str(e)}") # Log the raw response content logger.error(f"Raw response: {tts_response.text[:200]}") - + return jsonify({'error': error_msg}), 500 - + # The response contains the audio data directly temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3') with open(temp_audio_path, 'wb') as f: f.write(tts_response.content) - + return jsonify({ 'success': True, 'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}'