working version
This commit is contained in:
parent
1043402516
commit
8c8600d458
72
app.py
72
app.py
@ -28,7 +28,7 @@ def root_files(filename):
|
|||||||
'apple-touch-icon-120x120.png',
|
'apple-touch-icon-120x120.png',
|
||||||
'apple-touch-icon-120x120-precomposed.png'
|
'apple-touch-icon-120x120-precomposed.png'
|
||||||
]
|
]
|
||||||
|
|
||||||
if filename in common_icons:
|
if filename in common_icons:
|
||||||
# Map to appropriate icon in static/icons
|
# Map to appropriate icon in static/icons
|
||||||
icon_mapping = {
|
icon_mapping = {
|
||||||
@ -38,9 +38,9 @@ def root_files(filename):
|
|||||||
'apple-touch-icon-120x120.png': 'apple-icon-120x120.png',
|
'apple-touch-icon-120x120.png': 'apple-icon-120x120.png',
|
||||||
'apple-touch-icon-120x120-precomposed.png': 'apple-icon-120x120.png'
|
'apple-touch-icon-120x120-precomposed.png': 'apple-icon-120x120.png'
|
||||||
}
|
}
|
||||||
|
|
||||||
return send_from_directory('static/icons', icon_mapping.get(filename, 'apple-icon-180x180.png'))
|
return send_from_directory('static/icons', icon_mapping.get(filename, 'apple-icon-180x180.png'))
|
||||||
|
|
||||||
# If not an icon, return 404
|
# If not an icon, return 404
|
||||||
return "File not found", 404
|
return "File not found", 404
|
||||||
|
|
||||||
@ -206,11 +206,11 @@ LANGUAGE_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()}
|
|||||||
|
|
||||||
# Map language names to OpenAI TTS voice options
|
# Map language names to OpenAI TTS voice options
|
||||||
LANGUAGE_TO_VOICE = {
|
LANGUAGE_TO_VOICE = {
|
||||||
"Arabic": "ar-EG-ShakirNeural", # Using OpenAI general voices
|
"Arabic": "ar-EG-ShakirNeural", # Using OpenAI general voices
|
||||||
"Armenian": "echo", # as OpenAI doesn't have specific voices
|
"Armenian": "echo", # as OpenAI doesn't have specific voices
|
||||||
"Azerbaijani": "az-AZ-BanuNeural", # for all these languages
|
"Azerbaijani": "az-AZ-BanuNeural", # for all these languages
|
||||||
"English": "en-GB-RyanNeural", # We'll use the available voices
|
"English": "en-GB-RyanNeural", # We'll use the available voices
|
||||||
"French": "fr-FR-EloiseNeural", # and rely on the translation being
|
"French": "fr-FR-DeniseNeural", # and rely on the translation being
|
||||||
"Georgian": "ka-GE-GiorgiNeural", # in the correct language text
|
"Georgian": "ka-GE-GiorgiNeural", # in the correct language text
|
||||||
"Kazakh": "kk-KZ-DauletNeural",
|
"Kazakh": "kk-KZ-DauletNeural",
|
||||||
"Mandarin": "zh-CN-YunjianNeural",
|
"Mandarin": "zh-CN-YunjianNeural",
|
||||||
@ -230,22 +230,22 @@ def index():
|
|||||||
def transcribe():
|
def transcribe():
|
||||||
if 'audio' not in request.files:
|
if 'audio' not in request.files:
|
||||||
return jsonify({'error': 'No audio file provided'}), 400
|
return jsonify({'error': 'No audio file provided'}), 400
|
||||||
|
|
||||||
audio_file = request.files['audio']
|
audio_file = request.files['audio']
|
||||||
source_lang = request.form.get('source_lang', '')
|
source_lang = request.form.get('source_lang', '')
|
||||||
|
|
||||||
# Save the audio file temporarily
|
# Save the audio file temporarily
|
||||||
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav')
|
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav')
|
||||||
audio_file.save(temp_path)
|
audio_file.save(temp_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Use Whisper for transcription
|
# Use Whisper for transcription
|
||||||
result = whisper_model.transcribe(
|
result = whisper_model.transcribe(
|
||||||
temp_path,
|
temp_path,
|
||||||
language=LANGUAGE_TO_CODE.get(source_lang, None)
|
language=LANGUAGE_TO_CODE.get(source_lang, None)
|
||||||
)
|
)
|
||||||
transcribed_text = result["text"]
|
transcribed_text = result["text"]
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': True,
|
'success': True,
|
||||||
'text': transcribed_text
|
'text': transcribed_text
|
||||||
@ -265,19 +265,19 @@ def translate():
|
|||||||
text = data.get('text', '')
|
text = data.get('text', '')
|
||||||
source_lang = data.get('source_lang', '')
|
source_lang = data.get('source_lang', '')
|
||||||
target_lang = data.get('target_lang', '')
|
target_lang = data.get('target_lang', '')
|
||||||
|
|
||||||
if not text or not source_lang or not target_lang:
|
if not text or not source_lang or not target_lang:
|
||||||
return jsonify({'error': 'Missing required parameters'}), 400
|
return jsonify({'error': 'Missing required parameters'}), 400
|
||||||
|
|
||||||
# Create a prompt for Gemma 3 translation
|
# Create a prompt for Gemma 3 translation
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
Translate the following text from {source_lang} to {target_lang}:
|
Translate the following text from {source_lang} to {target_lang}:
|
||||||
|
|
||||||
"{text}"
|
"{text}"
|
||||||
|
|
||||||
Provide only the translation without any additional text.
|
Provide only the translation without any additional text.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Use Ollama to interact with Gemma 3
|
# Use Ollama to interact with Gemma 3
|
||||||
response = ollama.chat(
|
response = ollama.chat(
|
||||||
model="gemma3:27b",
|
model="gemma3:27b",
|
||||||
@ -288,9 +288,9 @@ def translate():
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
translated_text = response['message']['content'].strip()
|
translated_text = response['message']['content'].strip()
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': True,
|
'success': True,
|
||||||
'translation': translated_text
|
'translation': translated_text
|
||||||
@ -305,29 +305,29 @@ def speak():
|
|||||||
data = request.json
|
data = request.json
|
||||||
text = data.get('text', '')
|
text = data.get('text', '')
|
||||||
language = data.get('language', '')
|
language = data.get('language', '')
|
||||||
|
|
||||||
if not text or not language:
|
if not text or not language:
|
||||||
return jsonify({'error': 'Missing required parameters'}), 400
|
return jsonify({'error': 'Missing required parameters'}), 400
|
||||||
|
|
||||||
voice = LANGUAGE_TO_VOICE.get(language, 'echo') # Default to echo if language not found
|
voice = LANGUAGE_TO_VOICE.get(language, 'echo') # Default to echo if language not found
|
||||||
|
|
||||||
# Get TTS server URL and API key from config
|
# Get TTS server URL and API key from config
|
||||||
tts_server_url = app.config['TTS_SERVER']
|
tts_server_url = app.config['TTS_SERVER']
|
||||||
tts_api_key = app.config['TTS_API_KEY']
|
tts_api_key = app.config['TTS_API_KEY']
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Request TTS from the OpenAI Edge TTS server
|
# Request TTS from the OpenAI Edge TTS server
|
||||||
logger.info(f"Sending TTS request to {tts_server_url}")
|
logger.info(f"Sending TTS request to {tts_server_url}")
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {tts_api_key}"
|
"Authorization": f"Bearer {tts_api_key}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Log request details for debugging
|
# Log request details for debugging
|
||||||
logger.info(f"Text for TTS: {text}")
|
logger.info(f"Text for TTS: {text}")
|
||||||
logger.info(f"Selected voice: {voice}")
|
logger.info(f"Selected voice: {voice}")
|
||||||
|
|
||||||
# Proper OpenAI TTS payload
|
# Proper OpenAI TTS payload
|
||||||
payload = {
|
payload = {
|
||||||
"input": text,
|
"input": text,
|
||||||
@ -335,26 +335,26 @@ def speak():
|
|||||||
"response_format": "mp3",
|
"response_format": "mp3",
|
||||||
"speed": 1.0
|
"speed": 1.0
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug(f"Full TTS request payload: {payload}")
|
logger.debug(f"Full TTS request payload: {payload}")
|
||||||
|
|
||||||
# Dump the payload to ensure proper JSON formatting
|
# Dump the payload to ensure proper JSON formatting
|
||||||
payload_json = json.dumps(payload)
|
payload_json = json.dumps(payload)
|
||||||
logger.debug(f"Serialized payload: {payload_json}")
|
logger.debug(f"Serialized payload: {payload_json}")
|
||||||
|
|
||||||
tts_response = requests.post(
|
tts_response = requests.post(
|
||||||
tts_server_url,
|
tts_server_url,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=payload, # Use json parameter to ensure proper serialization
|
json=payload, # Use json parameter to ensure proper serialization
|
||||||
timeout=15 # Longer timeout for audio generation
|
timeout=15 # Longer timeout for audio generation
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"TTS response status: {tts_response.status_code}")
|
logger.info(f"TTS response status: {tts_response.status_code}")
|
||||||
|
|
||||||
if tts_response.status_code != 200:
|
if tts_response.status_code != 200:
|
||||||
error_msg = f'TTS request failed with status {tts_response.status_code}'
|
error_msg = f'TTS request failed with status {tts_response.status_code}'
|
||||||
logger.error(error_msg)
|
logger.error(error_msg)
|
||||||
|
|
||||||
# Try to get error details from response if possible
|
# Try to get error details from response if possible
|
||||||
try:
|
try:
|
||||||
error_details = tts_response.json()
|
error_details = tts_response.json()
|
||||||
@ -364,14 +364,14 @@ def speak():
|
|||||||
logger.error(f"Could not parse error response: {str(e)}")
|
logger.error(f"Could not parse error response: {str(e)}")
|
||||||
# Log the raw response content
|
# Log the raw response content
|
||||||
logger.error(f"Raw response: {tts_response.text[:200]}")
|
logger.error(f"Raw response: {tts_response.text[:200]}")
|
||||||
|
|
||||||
return jsonify({'error': error_msg}), 500
|
return jsonify({'error': error_msg}), 500
|
||||||
|
|
||||||
# The response contains the audio data directly
|
# The response contains the audio data directly
|
||||||
temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3')
|
temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3')
|
||||||
with open(temp_audio_path, 'wb') as f:
|
with open(temp_audio_path, 'wb') as f:
|
||||||
f.write(tts_response.content)
|
f.write(tts_response.content)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': True,
|
'success': True,
|
||||||
'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}'
|
'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}'
|
||||||
|
Loading…
Reference in New Issue
Block a user