quasi-final
This commit is contained in:
776
setup-script.sh
Executable file
776
setup-script.sh
Executable file
@@ -0,0 +1,776 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p templates static/{css,js}
|
||||
|
||||
# Move HTML template to templates directory
|
||||
cat > templates/index.html << 'EOL'
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Voice Language Translator</title>
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
|
||||
<style>
|
||||
body {
|
||||
padding-top: 20px;
|
||||
padding-bottom: 20px;
|
||||
background-color: #f8f9fa;
|
||||
}
|
||||
.record-btn {
|
||||
width: 80px;
|
||||
height: 80px;
|
||||
border-radius: 50%;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-size: 32px;
|
||||
margin: 20px auto;
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
||||
transition: all 0.3s;
|
||||
}
|
||||
.record-btn:active {
|
||||
transform: scale(0.95);
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.recording {
|
||||
background-color: #dc3545 !important;
|
||||
animation: pulse 1.5s infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0% {
|
||||
transform: scale(1);
|
||||
}
|
||||
50% {
|
||||
transform: scale(1.05);
|
||||
}
|
||||
100% {
|
||||
transform: scale(1);
|
||||
}
|
||||
}
|
||||
.card {
|
||||
border-radius: 15px;
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.card-header {
|
||||
border-radius: 15px 15px 0 0 !important;
|
||||
}
|
||||
.language-select {
|
||||
border-radius: 10px;
|
||||
padding: 10px;
|
||||
}
|
||||
.text-display {
|
||||
min-height: 100px;
|
||||
padding: 15px;
|
||||
background-color: #f8f9fa;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.btn-action {
|
||||
border-radius: 10px;
|
||||
padding: 8px 15px;
|
||||
margin: 5px;
|
||||
}
|
||||
.spinner-border {
|
||||
width: 1rem;
|
||||
height: 1rem;
|
||||
margin-right: 5px;
|
||||
}
|
||||
.status-indicator {
|
||||
font-size: 0.9rem;
|
||||
font-style: italic;
|
||||
color: #6c757d;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1 class="text-center mb-4">Voice Language Translator</h1>
|
||||
<p class="text-center text-muted">Powered by Gemma 3, Whisper & Edge TTS</p>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-6 mb-3">
|
||||
<div class="card">
|
||||
<div class="card-header bg-primary text-white">
|
||||
<h5 class="mb-0">Source</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<select id="sourceLanguage" class="form-select language-select mb-3">
|
||||
{% for language in languages %}
|
||||
<option value="{{ language }}">{{ language }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<div class="text-display" id="sourceText">
|
||||
<p class="text-muted">Your transcribed text will appear here...</p>
|
||||
</div>
|
||||
<div class="d-flex justify-content-between">
|
||||
<button id="playSource" class="btn btn-outline-primary btn-action" disabled>
|
||||
<i class="fas fa-play"></i> Play
|
||||
</button>
|
||||
<button id="clearSource" class="btn btn-outline-secondary btn-action">
|
||||
<i class="fas fa-trash"></i> Clear
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-6 mb-3">
|
||||
<div class="card">
|
||||
<div class="card-header bg-success text-white">
|
||||
<h5 class="mb-0">Translation</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<select id="targetLanguage" class="form-select language-select mb-3">
|
||||
{% for language in languages %}
|
||||
<option value="{{ language }}">{{ language }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<div class="text-display" id="translatedText">
|
||||
<p class="text-muted">Translation will appear here...</p>
|
||||
</div>
|
||||
<div class="d-flex justify-content-between">
|
||||
<button id="playTranslation" class="btn btn-outline-success btn-action" disabled>
|
||||
<i class="fas fa-play"></i> Play
|
||||
</button>
|
||||
<button id="clearTranslation" class="btn btn-outline-secondary btn-action">
|
||||
<i class="fas fa-trash"></i> Clear
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="text-center">
|
||||
<button id="recordBtn" class="btn btn-primary record-btn">
|
||||
<i class="fas fa-microphone"></i>
|
||||
</button>
|
||||
<p class="status-indicator" id="statusIndicator">Click to start recording</p>
|
||||
</div>
|
||||
|
||||
<div class="text-center mt-3">
|
||||
<button id="translateBtn" class="btn btn-success" disabled>
|
||||
<i class="fas fa-language"></i> Translate
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="mt-3">
|
||||
<div class="progress d-none" id="progressContainer">
|
||||
<div id="progressBar" class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" style="width: 0%"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<audio id="audioPlayer" style="display: none;"></audio>
|
||||
</div>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// DOM elements
|
||||
const recordBtn = document.getElementById('recordBtn');
|
||||
const translateBtn = document.getElementById('translateBtn');
|
||||
const sourceText = document.getElementById('sourceText');
|
||||
const translatedText = document.getElementById('translatedText');
|
||||
const sourceLanguage = document.getElementById('sourceLanguage');
|
||||
const targetLanguage = document.getElementById('targetLanguage');
|
||||
const playSource = document.getElementById('playSource');
|
||||
const playTranslation = document.getElementById('playTranslation');
|
||||
const clearSource = document.getElementById('clearSource');
|
||||
const clearTranslation = document.getElementById('clearTranslation');
|
||||
const statusIndicator = document.getElementById('statusIndicator');
|
||||
const progressContainer = document.getElementById('progressContainer');
|
||||
const progressBar = document.getElementById('progressBar');
|
||||
const audioPlayer = document.getElementById('audioPlayer');
|
||||
|
||||
// Set initial values
|
||||
let isRecording = false;
|
||||
let mediaRecorder = null;
|
||||
let audioChunks = [];
|
||||
let currentSourceText = '';
|
||||
let currentTranslationText = '';
|
||||
|
||||
// Make sure target language is different from source
|
||||
if (targetLanguage.options[0].value === sourceLanguage.value) {
|
||||
targetLanguage.selectedIndex = 1;
|
||||
}
|
||||
|
||||
// Event listeners for language selection
|
||||
sourceLanguage.addEventListener('change', function() {
|
||||
if (targetLanguage.value === sourceLanguage.value) {
|
||||
for (let i = 0; i < targetLanguage.options.length; i++) {
|
||||
if (targetLanguage.options[i].value !== sourceLanguage.value) {
|
||||
targetLanguage.selectedIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
targetLanguage.addEventListener('change', function() {
|
||||
if (targetLanguage.value === sourceLanguage.value) {
|
||||
for (let i = 0; i < sourceLanguage.options.length; i++) {
|
||||
if (sourceLanguage.options[i].value !== targetLanguage.value) {
|
||||
sourceLanguage.selectedIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Record button click event
|
||||
recordBtn.addEventListener('click', function() {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
});
|
||||
|
||||
// Function to start recording
|
||||
function startRecording() {
|
||||
navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
.then(stream => {
|
||||
mediaRecorder = new MediaRecorder(stream);
|
||||
audioChunks = [];
|
||||
|
||||
mediaRecorder.addEventListener('dataavailable', event => {
|
||||
audioChunks.push(event.data);
|
||||
});
|
||||
|
||||
mediaRecorder.addEventListener('stop', () => {
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
||||
transcribeAudio(audioBlob);
|
||||
});
|
||||
|
||||
mediaRecorder.start();
|
||||
isRecording = true;
|
||||
recordBtn.classList.add('recording');
|
||||
recordBtn.classList.replace('btn-primary', 'btn-danger');
|
||||
recordBtn.innerHTML = '<i class="fas fa-stop"></i>';
|
||||
statusIndicator.textContent = 'Recording... Click to stop';
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error accessing microphone:', error);
|
||||
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
|
||||
});
|
||||
}
|
||||
|
||||
// Function to stop recording
|
||||
function stopRecording() {
|
||||
mediaRecorder.stop();
|
||||
isRecording = false;
|
||||
recordBtn.classList.remove('recording');
|
||||
recordBtn.classList.replace('btn-danger', 'btn-primary');
|
||||
recordBtn.innerHTML = '<i class="fas fa-microphone"></i>';
|
||||
statusIndicator.textContent = 'Processing audio...';
|
||||
|
||||
// Stop all audio tracks
|
||||
mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
||||
}
|
||||
|
||||
// Function to transcribe audio
|
||||
function transcribeAudio(audioBlob) {
|
||||
const formData = new FormData();
|
||||
formData.append('audio', audioBlob);
|
||||
formData.append('source_lang', sourceLanguage.value);
|
||||
|
||||
showProgress();
|
||||
|
||||
fetch('/transcribe', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
hideProgress();
|
||||
|
||||
if (data.success) {
|
||||
currentSourceText = data.text;
|
||||
sourceText.innerHTML = `<p>${data.text}</p>`;
|
||||
playSource.disabled = false;
|
||||
translateBtn.disabled = false;
|
||||
statusIndicator.textContent = 'Transcription complete';
|
||||
} else {
|
||||
sourceText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
|
||||
statusIndicator.textContent = 'Transcription failed';
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('Transcription error:', error);
|
||||
sourceText.innerHTML = `<p class="text-danger">Failed to transcribe audio. Please try again.</p>`;
|
||||
statusIndicator.textContent = 'Transcription failed';
|
||||
});
|
||||
}
|
||||
|
||||
// Translate button click event
|
||||
translateBtn.addEventListener('click', function() {
|
||||
if (!currentSourceText) {
|
||||
return;
|
||||
}
|
||||
|
||||
statusIndicator.textContent = 'Translating...';
|
||||
showProgress();
|
||||
|
||||
fetch('/translate', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text: currentSourceText,
|
||||
source_lang: sourceLanguage.value,
|
||||
target_lang: targetLanguage.value
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
hideProgress();
|
||||
|
||||
if (data.success) {
|
||||
currentTranslationText = data.translation;
|
||||
translatedText.innerHTML = `<p>${data.translation}</p>`;
|
||||
playTranslation.disabled = false;
|
||||
statusIndicator.textContent = 'Translation complete';
|
||||
} else {
|
||||
translatedText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
|
||||
statusIndicator.textContent = 'Translation failed';
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('Translation error:', error);
|
||||
translatedText.innerHTML = `<p class="text-danger">Failed to translate. Please try again.</p>`;
|
||||
statusIndicator.textContent = 'Translation failed';
|
||||
});
|
||||
});
|
||||
|
||||
// Play source text
|
||||
playSource.addEventListener('click', function() {
|
||||
if (!currentSourceText) return;
|
||||
|
||||
playAudio(currentSourceText, sourceLanguage.value);
|
||||
statusIndicator.textContent = 'Playing source audio...';
|
||||
});
|
||||
|
||||
// Play translation
|
||||
playTranslation.addEventListener('click', function() {
|
||||
if (!currentTranslationText) return;
|
||||
|
||||
playAudio(currentTranslationText, targetLanguage.value);
|
||||
statusIndicator.textContent = 'Playing translation audio...';
|
||||
});
|
||||
|
||||
// Function to play audio via TTS
|
||||
function playAudio(text, language) {
|
||||
showProgress();
|
||||
|
||||
fetch('/speak', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text: text,
|
||||
language: language
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
hideProgress();
|
||||
|
||||
if (data.success) {
|
||||
audioPlayer.src = data.audio_url;
|
||||
audioPlayer.onended = function() {
|
||||
statusIndicator.textContent = 'Ready';
|
||||
};
|
||||
audioPlayer.play();
|
||||
} else {
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
alert('Failed to play audio: ' + data.error);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('TTS error:', error);
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
});
|
||||
}
|
||||
|
||||
// Clear buttons
|
||||
clearSource.addEventListener('click', function() {
|
||||
sourceText.innerHTML = '<p class="text-muted">Your transcribed text will appear here...</p>';
|
||||
currentSourceText = '';
|
||||
playSource.disabled = true;
|
||||
translateBtn.disabled = true;
|
||||
});
|
||||
|
||||
clearTranslation.addEventListener('click', function() {
|
||||
translatedText.innerHTML = '<p class="text-muted">Translation will appear here...</p>';
|
||||
currentTranslationText = '';
|
||||
playTranslation.disabled = true;
|
||||
});
|
||||
|
||||
// Progress indicator functions
|
||||
function showProgress() {
|
||||
progressContainer.classList.remove('d-none');
|
||||
let progress = 0;
|
||||
const interval = setInterval(() => {
|
||||
progress += 5;
|
||||
if (progress > 90) {
|
||||
clearInterval(interval);
|
||||
}
|
||||
progressBar.style.width = `${progress}%`;
|
||||
}, 100);
|
||||
progressBar.dataset.interval = interval;
|
||||
}
|
||||
|
||||
function hideProgress() {
|
||||
const interval = progressBar.dataset.interval;
|
||||
if (interval) {
|
||||
clearInterval(Number(interval));
|
||||
}
|
||||
progressBar.style.width = '100%';
|
||||
setTimeout(() => {
|
||||
progressContainer.classList.add('d-none');
|
||||
progressBar.style.width = '0%';
|
||||
}, 500);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
EOL
|
||||
|
||||
# Create app.py
|
||||
cat > app.py << 'EOL'
|
||||
import os
|
||||
import time
|
||||
import tempfile
|
||||
import requests
|
||||
import json
|
||||
from flask import Flask, render_template, request, jsonify, Response, send_file
|
||||
import whisper
|
||||
import torch
|
||||
import ollama
|
||||
import logging
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp()
|
||||
app.config['TTS_SERVER'] = os.environ.get('TTS_SERVER_URL', 'http://localhost:5050/v1/audio/speech')
|
||||
app.config['TTS_API_KEY'] = os.environ.get('TTS_API_KEY', 'your_api_key_here')
|
||||
|
||||
# Add a route to check TTS server status
|
||||
@app.route('/check_tts_server', methods=['GET'])
|
||||
def check_tts_server():
|
||||
try:
|
||||
# Try a simple HTTP request to the TTS server
|
||||
response = requests.get(app.config['TTS_SERVER'].rsplit('/api/generate', 1)[0] + '/status', timeout=5)
|
||||
if response.status_code == 200:
|
||||
return jsonify({
|
||||
'status': 'online',
|
||||
'url': app.config['TTS_SERVER']
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'TTS server returned status code {response.status_code}',
|
||||
'url': app.config['TTS_SERVER']
|
||||
})
|
||||
except requests.exceptions.RequestException as e:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'Cannot connect to TTS server: {str(e)}',
|
||||
'url': app.config['TTS_SERVER']
|
||||
})
|
||||
|
||||
# Initialize logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Load Whisper model
|
||||
logger.info("Loading Whisper model...")
|
||||
whisper_model = whisper.load_model("base")
|
||||
logger.info("Whisper model loaded successfully")
|
||||
|
||||
# Supported languages
|
||||
SUPPORTED_LANGUAGES = {
|
||||
"ar": "Arabic",
|
||||
"hy": "Armenian",
|
||||
"az": "Azerbaijani",
|
||||
"en": "English",
|
||||
"fr": "French",
|
||||
"ka": "Georgian",
|
||||
"kk": "Kazakh",
|
||||
"zh": "Mandarin",
|
||||
"fa": "Farsi",
|
||||
"pt": "Portuguese",
|
||||
"ru": "Russian",
|
||||
"es": "Spanish",
|
||||
"tr": "Turkish",
|
||||
"uz": "Uzbek"
|
||||
}
|
||||
|
||||
# Map language names to language codes
|
||||
LANGUAGE_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()}
|
||||
|
||||
# Map language names to OpenAI TTS voice options
|
||||
LANGUAGE_TO_VOICE = {
|
||||
"Arabic": "alloy", # Using OpenAI general voices
|
||||
"Armenian": "echo", # as OpenAI doesn't have specific voices
|
||||
"Azerbaijani": "nova", # for all these languages
|
||||
"English": "echo", # We'll use the available voices
|
||||
"French": "alloy", # and rely on the translation being
|
||||
"Georgian": "fable", # in the correct language text
|
||||
"Kazakh": "onyx",
|
||||
"Mandarin": "shimmer",
|
||||
"Farsi": "nova",
|
||||
"Portuguese": "alloy",
|
||||
"Russian": "echo",
|
||||
"Spanish": "nova",
|
||||
"Turkish": "fable",
|
||||
"Uzbek": "onyx"
|
||||
}
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return render_template('index.html', languages=sorted(SUPPORTED_LANGUAGES.values()))
|
||||
|
||||
@app.route('/transcribe', methods=['POST'])
|
||||
def transcribe():
|
||||
if 'audio' not in request.files:
|
||||
return jsonify({'error': 'No audio file provided'}), 400
|
||||
|
||||
audio_file = request.files['audio']
|
||||
source_lang = request.form.get('source_lang', '')
|
||||
|
||||
# Save the audio file temporarily
|
||||
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav')
|
||||
audio_file.save(temp_path)
|
||||
|
||||
try:
|
||||
# Use Whisper for transcription
|
||||
result = whisper_model.transcribe(
|
||||
temp_path,
|
||||
language=LANGUAGE_TO_CODE.get(source_lang, None)
|
||||
)
|
||||
transcribed_text = result["text"]
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'text': transcribed_text
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription error: {str(e)}")
|
||||
return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
|
||||
finally:
|
||||
# Clean up the temporary file
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
|
||||
@app.route('/translate', methods=['POST'])
|
||||
def translate():
|
||||
try:
|
||||
data = request.json
|
||||
text = data.get('text', '')
|
||||
source_lang = data.get('source_lang', '')
|
||||
target_lang = data.get('target_lang', '')
|
||||
|
||||
if not text or not source_lang or not target_lang:
|
||||
return jsonify({'error': 'Missing required parameters'}), 400
|
||||
|
||||
# Create a prompt for Gemma 3 translation
|
||||
prompt = f"""
|
||||
Translate the following text from {source_lang} to {target_lang}:
|
||||
|
||||
"{text}"
|
||||
|
||||
Provide only the translation without any additional text.
|
||||
"""
|
||||
|
||||
# Use Ollama to interact with Gemma 3
|
||||
response = ollama.chat(
|
||||
model="gemma3",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
translated_text = response['message']['content'].strip()
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'translation': translated_text
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Translation error: {str(e)}")
|
||||
return jsonify({'error': f'Translation failed: {str(e)}'}), 500
|
||||
|
||||
@app.route('/speak', methods=['POST'])
|
||||
def speak():
|
||||
try:
|
||||
data = request.json
|
||||
text = data.get('text', '')
|
||||
language = data.get('language', '')
|
||||
|
||||
if not text or not language:
|
||||
return jsonify({'error': 'Missing required parameters'}), 400
|
||||
|
||||
voice = LANGUAGE_TO_VOICE.get(language)
|
||||
if not voice:
|
||||
return jsonify({'error': 'Unsupported language for TTS'}), 400
|
||||
|
||||
# Get TTS server URL from environment or config
|
||||
tts_server_url = app.config['TTS_SERVER']
|
||||
|
||||
try:
|
||||
# Request TTS from the Edge TTS server
|
||||
logger.info(f"Sending TTS request to {tts_server_url}")
|
||||
tts_response = requests.post(
|
||||
tts_server_url,
|
||||
json={
|
||||
'text': text,
|
||||
'voice': voice,
|
||||
'output_format': 'mp3'
|
||||
},
|
||||
timeout=10 # Add timeout
|
||||
)
|
||||
|
||||
logger.info(f"TTS response status: {tts_response.status_code}")
|
||||
|
||||
if tts_response.status_code != 200:
|
||||
error_msg = f'TTS request failed with status {tts_response.status_code}'
|
||||
logger.error(error_msg)
|
||||
|
||||
# Try to get error details from response if possible
|
||||
try:
|
||||
error_details = tts_response.json()
|
||||
logger.error(f"Error details: {error_details}")
|
||||
except:
|
||||
pass
|
||||
|
||||
return jsonify({'error': error_msg}), 500
|
||||
|
||||
# The response contains the audio data directly
|
||||
temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3')
|
||||
with open(temp_audio_path, 'wb') as f:
|
||||
f.write(tts_response.content)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}'
|
||||
})
|
||||
except requests.exceptions.RequestException as e:
|
||||
error_msg = f'Failed to connect to TTS server: {str(e)}'
|
||||
logger.error(error_msg)
|
||||
return jsonify({'error': error_msg}), 500
|
||||
except Exception as e:
|
||||
logger.error(f"TTS error: {str(e)}")
|
||||
return jsonify({'error': f'TTS failed: {str(e)}'}), 500
|
||||
|
||||
@app.route('/get_audio/<filename>')
|
||||
def get_audio(filename):
|
||||
try:
|
||||
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||||
return send_file(file_path, mimetype='audio/mpeg')
|
||||
except Exception as e:
|
||||
logger.error(f"Audio retrieval error: {str(e)}")
|
||||
return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=8000, debug=True)
|
||||
EOL
|
||||
|
||||
# Create requirements.txt
|
||||
cat > requirements.txt << 'EOL'
|
||||
flask==2.3.2
|
||||
requests==2.31.0
|
||||
openai-whisper==20231117
|
||||
torch==2.1.0
|
||||
ollama==0.1.5
|
||||
EOL
|
||||
|
||||
# Create README.md
|
||||
cat > README.md << 'EOL'
|
||||
# Voice Language Translator
|
||||
|
||||
A mobile-friendly web application that translates spoken language between multiple languages using:
|
||||
- Gemma 3 open-source LLM via Ollama for translation
|
||||
- OpenAI Whisper for speech-to-text
|
||||
- OpenAI Edge TTS for text-to-speech
|
||||
|
||||
## Supported Languages
|
||||
|
||||
- Arabic
|
||||
- Armenian
|
||||
- Azerbaijani
|
||||
- English
|
||||
- French
|
||||
- Georgian
|
||||
- Kazakh
|
||||
- Mandarin
|
||||
- Farsi
|
||||
- Portuguese
|
||||
- Russian
|
||||
- Spanish
|
||||
- Turkish
|
||||
- Uzbek
|
||||
|
||||
## Setup Instructions
|
||||
|
||||
1. Install the required Python packages:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Make sure you have Ollama installed and the Gemma 3 model loaded:
|
||||
```
|
||||
ollama pull gemma3
|
||||
```
|
||||
|
||||
3. Ensure your OpenAI Edge TTS server is running on port 5050.
|
||||
|
||||
4. Run the application:
|
||||
```
|
||||
python app.py
|
||||
```
|
||||
|
||||
5. Open your browser and navigate to:
|
||||
```
|
||||
http://localhost:8000
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Select your source language from the dropdown menu
|
||||
2. Press the microphone button and speak
|
||||
3. Press the button again to stop recording
|
||||
4. Wait for the transcription to complete
|
||||
5. Select your target language
|
||||
6. Press the "Translate" button
|
||||
7. Use the play buttons to hear the original or translated text
|
||||
|
||||
## Technical Details
|
||||
|
||||
- The app uses Flask for the web server
|
||||
- Audio is processed client-side using the MediaRecorder API
|
||||
- Whisper for speech recognition with language hints
|
||||
- Ollama provides access to the Gemma 3 model for translation
|
||||
- OpenAI Edge TTS delivers natural-sounding speech output
|
||||
|
||||
## Mobile Support
|
||||
|
||||
The interface is fully responsive and designed to work well on mobile devices.
|
||||
EOL
|
||||
|
||||
# Make the script executable
|
||||
chmod +x app.py
|
||||
|
||||
echo "Setup complete! Run the app with: python app.py"
|
||||
Reference in New Issue
Block a user