777 lines
28 KiB
Bash
Executable File
777 lines
28 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Create necessary directories
|
|
mkdir -p templates static/{css,js}
|
|
|
|
# Move HTML template to templates directory
|
|
cat > templates/index.html << 'EOL'
|
|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Voice Language Translator</title>
|
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
|
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
|
|
<style>
|
|
body {
|
|
padding-top: 20px;
|
|
padding-bottom: 20px;
|
|
background-color: #f8f9fa;
|
|
}
|
|
.record-btn {
|
|
width: 80px;
|
|
height: 80px;
|
|
border-radius: 50%;
|
|
display: flex;
|
|
align-items: center;
|
|
justify-content: center;
|
|
font-size: 32px;
|
|
margin: 20px auto;
|
|
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
|
transition: all 0.3s;
|
|
}
|
|
.record-btn:active {
|
|
transform: scale(0.95);
|
|
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
|
}
|
|
.recording {
|
|
background-color: #dc3545 !important;
|
|
animation: pulse 1.5s infinite;
|
|
}
|
|
@keyframes pulse {
|
|
0% {
|
|
transform: scale(1);
|
|
}
|
|
50% {
|
|
transform: scale(1.05);
|
|
}
|
|
100% {
|
|
transform: scale(1);
|
|
}
|
|
}
|
|
.card {
|
|
border-radius: 15px;
|
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
|
margin-bottom: 20px;
|
|
}
|
|
.card-header {
|
|
border-radius: 15px 15px 0 0 !important;
|
|
}
|
|
.language-select {
|
|
border-radius: 10px;
|
|
padding: 10px;
|
|
}
|
|
.text-display {
|
|
min-height: 100px;
|
|
padding: 15px;
|
|
background-color: #f8f9fa;
|
|
border-radius: 10px;
|
|
margin-bottom: 15px;
|
|
}
|
|
.btn-action {
|
|
border-radius: 10px;
|
|
padding: 8px 15px;
|
|
margin: 5px;
|
|
}
|
|
.spinner-border {
|
|
width: 1rem;
|
|
height: 1rem;
|
|
margin-right: 5px;
|
|
}
|
|
.status-indicator {
|
|
font-size: 0.9rem;
|
|
font-style: italic;
|
|
color: #6c757d;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<h1 class="text-center mb-4">Voice Language Translator</h1>
|
|
<p class="text-center text-muted">Powered by Gemma 3, Whisper & Edge TTS</p>
|
|
|
|
<div class="row">
|
|
<div class="col-md-6 mb-3">
|
|
<div class="card">
|
|
<div class="card-header bg-primary text-white">
|
|
<h5 class="mb-0">Source</h5>
|
|
</div>
|
|
<div class="card-body">
|
|
<select id="sourceLanguage" class="form-select language-select mb-3">
|
|
{% for language in languages %}
|
|
<option value="{{ language }}">{{ language }}</option>
|
|
{% endfor %}
|
|
</select>
|
|
<div class="text-display" id="sourceText">
|
|
<p class="text-muted">Your transcribed text will appear here...</p>
|
|
</div>
|
|
<div class="d-flex justify-content-between">
|
|
<button id="playSource" class="btn btn-outline-primary btn-action" disabled>
|
|
<i class="fas fa-play"></i> Play
|
|
</button>
|
|
<button id="clearSource" class="btn btn-outline-secondary btn-action">
|
|
<i class="fas fa-trash"></i> Clear
|
|
</button>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-md-6 mb-3">
|
|
<div class="card">
|
|
<div class="card-header bg-success text-white">
|
|
<h5 class="mb-0">Translation</h5>
|
|
</div>
|
|
<div class="card-body">
|
|
<select id="targetLanguage" class="form-select language-select mb-3">
|
|
{% for language in languages %}
|
|
<option value="{{ language }}">{{ language }}</option>
|
|
{% endfor %}
|
|
</select>
|
|
<div class="text-display" id="translatedText">
|
|
<p class="text-muted">Translation will appear here...</p>
|
|
</div>
|
|
<div class="d-flex justify-content-between">
|
|
<button id="playTranslation" class="btn btn-outline-success btn-action" disabled>
|
|
<i class="fas fa-play"></i> Play
|
|
</button>
|
|
<button id="clearTranslation" class="btn btn-outline-secondary btn-action">
|
|
<i class="fas fa-trash"></i> Clear
|
|
</button>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="text-center">
|
|
<button id="recordBtn" class="btn btn-primary record-btn">
|
|
<i class="fas fa-microphone"></i>
|
|
</button>
|
|
<p class="status-indicator" id="statusIndicator">Click to start recording</p>
|
|
</div>
|
|
|
|
<div class="text-center mt-3">
|
|
<button id="translateBtn" class="btn btn-success" disabled>
|
|
<i class="fas fa-language"></i> Translate
|
|
</button>
|
|
</div>
|
|
|
|
<div class="mt-3">
|
|
<div class="progress d-none" id="progressContainer">
|
|
<div id="progressBar" class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" style="width: 0%"></div>
|
|
</div>
|
|
</div>
|
|
|
|
<audio id="audioPlayer" style="display: none;"></audio>
|
|
</div>
|
|
|
|
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
|
|
<script>
|
|
document.addEventListener('DOMContentLoaded', function() {
|
|
// DOM elements
|
|
const recordBtn = document.getElementById('recordBtn');
|
|
const translateBtn = document.getElementById('translateBtn');
|
|
const sourceText = document.getElementById('sourceText');
|
|
const translatedText = document.getElementById('translatedText');
|
|
const sourceLanguage = document.getElementById('sourceLanguage');
|
|
const targetLanguage = document.getElementById('targetLanguage');
|
|
const playSource = document.getElementById('playSource');
|
|
const playTranslation = document.getElementById('playTranslation');
|
|
const clearSource = document.getElementById('clearSource');
|
|
const clearTranslation = document.getElementById('clearTranslation');
|
|
const statusIndicator = document.getElementById('statusIndicator');
|
|
const progressContainer = document.getElementById('progressContainer');
|
|
const progressBar = document.getElementById('progressBar');
|
|
const audioPlayer = document.getElementById('audioPlayer');
|
|
|
|
// Set initial values
|
|
let isRecording = false;
|
|
let mediaRecorder = null;
|
|
let audioChunks = [];
|
|
let currentSourceText = '';
|
|
let currentTranslationText = '';
|
|
|
|
// Make sure target language is different from source
|
|
if (targetLanguage.options[0].value === sourceLanguage.value) {
|
|
targetLanguage.selectedIndex = 1;
|
|
}
|
|
|
|
// Event listeners for language selection
|
|
sourceLanguage.addEventListener('change', function() {
|
|
if (targetLanguage.value === sourceLanguage.value) {
|
|
for (let i = 0; i < targetLanguage.options.length; i++) {
|
|
if (targetLanguage.options[i].value !== sourceLanguage.value) {
|
|
targetLanguage.selectedIndex = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
targetLanguage.addEventListener('change', function() {
|
|
if (targetLanguage.value === sourceLanguage.value) {
|
|
for (let i = 0; i < sourceLanguage.options.length; i++) {
|
|
if (sourceLanguage.options[i].value !== targetLanguage.value) {
|
|
sourceLanguage.selectedIndex = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Record button click event
|
|
recordBtn.addEventListener('click', function() {
|
|
if (isRecording) {
|
|
stopRecording();
|
|
} else {
|
|
startRecording();
|
|
}
|
|
});
|
|
|
|
// Function to start recording
|
|
function startRecording() {
|
|
navigator.mediaDevices.getUserMedia({ audio: true })
|
|
.then(stream => {
|
|
mediaRecorder = new MediaRecorder(stream);
|
|
audioChunks = [];
|
|
|
|
mediaRecorder.addEventListener('dataavailable', event => {
|
|
audioChunks.push(event.data);
|
|
});
|
|
|
|
mediaRecorder.addEventListener('stop', () => {
|
|
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
|
transcribeAudio(audioBlob);
|
|
});
|
|
|
|
mediaRecorder.start();
|
|
isRecording = true;
|
|
recordBtn.classList.add('recording');
|
|
recordBtn.classList.replace('btn-primary', 'btn-danger');
|
|
recordBtn.innerHTML = '<i class="fas fa-stop"></i>';
|
|
statusIndicator.textContent = 'Recording... Click to stop';
|
|
})
|
|
.catch(error => {
|
|
console.error('Error accessing microphone:', error);
|
|
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
|
|
});
|
|
}
|
|
|
|
// Function to stop recording
|
|
function stopRecording() {
|
|
mediaRecorder.stop();
|
|
isRecording = false;
|
|
recordBtn.classList.remove('recording');
|
|
recordBtn.classList.replace('btn-danger', 'btn-primary');
|
|
recordBtn.innerHTML = '<i class="fas fa-microphone"></i>';
|
|
statusIndicator.textContent = 'Processing audio...';
|
|
|
|
// Stop all audio tracks
|
|
mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
|
}
|
|
|
|
// Function to transcribe audio
|
|
function transcribeAudio(audioBlob) {
|
|
const formData = new FormData();
|
|
formData.append('audio', audioBlob);
|
|
formData.append('source_lang', sourceLanguage.value);
|
|
|
|
showProgress();
|
|
|
|
fetch('/transcribe', {
|
|
method: 'POST',
|
|
body: formData
|
|
})
|
|
.then(response => response.json())
|
|
.then(data => {
|
|
hideProgress();
|
|
|
|
if (data.success) {
|
|
currentSourceText = data.text;
|
|
sourceText.innerHTML = `<p>${data.text}</p>`;
|
|
playSource.disabled = false;
|
|
translateBtn.disabled = false;
|
|
statusIndicator.textContent = 'Transcription complete';
|
|
} else {
|
|
sourceText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
|
|
statusIndicator.textContent = 'Transcription failed';
|
|
}
|
|
})
|
|
.catch(error => {
|
|
hideProgress();
|
|
console.error('Transcription error:', error);
|
|
sourceText.innerHTML = `<p class="text-danger">Failed to transcribe audio. Please try again.</p>`;
|
|
statusIndicator.textContent = 'Transcription failed';
|
|
});
|
|
}
|
|
|
|
// Translate button click event
|
|
translateBtn.addEventListener('click', function() {
|
|
if (!currentSourceText) {
|
|
return;
|
|
}
|
|
|
|
statusIndicator.textContent = 'Translating...';
|
|
showProgress();
|
|
|
|
fetch('/translate', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
text: currentSourceText,
|
|
source_lang: sourceLanguage.value,
|
|
target_lang: targetLanguage.value
|
|
})
|
|
})
|
|
.then(response => response.json())
|
|
.then(data => {
|
|
hideProgress();
|
|
|
|
if (data.success) {
|
|
currentTranslationText = data.translation;
|
|
translatedText.innerHTML = `<p>${data.translation}</p>`;
|
|
playTranslation.disabled = false;
|
|
statusIndicator.textContent = 'Translation complete';
|
|
} else {
|
|
translatedText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
|
|
statusIndicator.textContent = 'Translation failed';
|
|
}
|
|
})
|
|
.catch(error => {
|
|
hideProgress();
|
|
console.error('Translation error:', error);
|
|
translatedText.innerHTML = `<p class="text-danger">Failed to translate. Please try again.</p>`;
|
|
statusIndicator.textContent = 'Translation failed';
|
|
});
|
|
});
|
|
|
|
// Play source text
|
|
playSource.addEventListener('click', function() {
|
|
if (!currentSourceText) return;
|
|
|
|
playAudio(currentSourceText, sourceLanguage.value);
|
|
statusIndicator.textContent = 'Playing source audio...';
|
|
});
|
|
|
|
// Play translation
|
|
playTranslation.addEventListener('click', function() {
|
|
if (!currentTranslationText) return;
|
|
|
|
playAudio(currentTranslationText, targetLanguage.value);
|
|
statusIndicator.textContent = 'Playing translation audio...';
|
|
});
|
|
|
|
// Function to play audio via TTS
|
|
function playAudio(text, language) {
|
|
showProgress();
|
|
|
|
fetch('/speak', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
text: text,
|
|
language: language
|
|
})
|
|
})
|
|
.then(response => response.json())
|
|
.then(data => {
|
|
hideProgress();
|
|
|
|
if (data.success) {
|
|
audioPlayer.src = data.audio_url;
|
|
audioPlayer.onended = function() {
|
|
statusIndicator.textContent = 'Ready';
|
|
};
|
|
audioPlayer.play();
|
|
} else {
|
|
statusIndicator.textContent = 'TTS failed';
|
|
alert('Failed to play audio: ' + data.error);
|
|
}
|
|
})
|
|
.catch(error => {
|
|
hideProgress();
|
|
console.error('TTS error:', error);
|
|
statusIndicator.textContent = 'TTS failed';
|
|
});
|
|
}
|
|
|
|
// Clear buttons
|
|
clearSource.addEventListener('click', function() {
|
|
sourceText.innerHTML = '<p class="text-muted">Your transcribed text will appear here...</p>';
|
|
currentSourceText = '';
|
|
playSource.disabled = true;
|
|
translateBtn.disabled = true;
|
|
});
|
|
|
|
clearTranslation.addEventListener('click', function() {
|
|
translatedText.innerHTML = '<p class="text-muted">Translation will appear here...</p>';
|
|
currentTranslationText = '';
|
|
playTranslation.disabled = true;
|
|
});
|
|
|
|
// Progress indicator functions
|
|
function showProgress() {
|
|
progressContainer.classList.remove('d-none');
|
|
let progress = 0;
|
|
const interval = setInterval(() => {
|
|
progress += 5;
|
|
if (progress > 90) {
|
|
clearInterval(interval);
|
|
}
|
|
progressBar.style.width = `${progress}%`;
|
|
}, 100);
|
|
progressBar.dataset.interval = interval;
|
|
}
|
|
|
|
function hideProgress() {
|
|
const interval = progressBar.dataset.interval;
|
|
if (interval) {
|
|
clearInterval(Number(interval));
|
|
}
|
|
progressBar.style.width = '100%';
|
|
setTimeout(() => {
|
|
progressContainer.classList.add('d-none');
|
|
progressBar.style.width = '0%';
|
|
}, 500);
|
|
}
|
|
});
|
|
</script>
|
|
</body>
|
|
</html>
|
|
EOL
|
|
|
|
# Create app.py
|
|
cat > app.py << 'EOL'
|
|
import os
|
|
import time
|
|
import tempfile
|
|
import requests
|
|
import json
|
|
from flask import Flask, render_template, request, jsonify, Response, send_file
|
|
import whisper
|
|
import torch
|
|
import ollama
|
|
import logging
|
|
|
|
app = Flask(__name__)
|
|
app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp()
|
|
app.config['TTS_SERVER'] = os.environ.get('TTS_SERVER_URL', 'http://localhost:5050/v1/audio/speech')
|
|
app.config['TTS_API_KEY'] = os.environ.get('TTS_API_KEY', 'your_api_key_here')
|
|
|
|
# Add a route to check TTS server status
|
|
@app.route('/check_tts_server', methods=['GET'])
|
|
def check_tts_server():
|
|
try:
|
|
# Try a simple HTTP request to the TTS server
|
|
response = requests.get(app.config['TTS_SERVER'].rsplit('/api/generate', 1)[0] + '/status', timeout=5)
|
|
if response.status_code == 200:
|
|
return jsonify({
|
|
'status': 'online',
|
|
'url': app.config['TTS_SERVER']
|
|
})
|
|
else:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': f'TTS server returned status code {response.status_code}',
|
|
'url': app.config['TTS_SERVER']
|
|
})
|
|
except requests.exceptions.RequestException as e:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': f'Cannot connect to TTS server: {str(e)}',
|
|
'url': app.config['TTS_SERVER']
|
|
})
|
|
|
|
# Initialize logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Load Whisper model
|
|
logger.info("Loading Whisper model...")
|
|
whisper_model = whisper.load_model("base")
|
|
logger.info("Whisper model loaded successfully")
|
|
|
|
# Supported languages
|
|
SUPPORTED_LANGUAGES = {
|
|
"ar": "Arabic",
|
|
"hy": "Armenian",
|
|
"az": "Azerbaijani",
|
|
"en": "English",
|
|
"fr": "French",
|
|
"ka": "Georgian",
|
|
"kk": "Kazakh",
|
|
"zh": "Mandarin",
|
|
"fa": "Farsi",
|
|
"pt": "Portuguese",
|
|
"ru": "Russian",
|
|
"es": "Spanish",
|
|
"tr": "Turkish",
|
|
"uz": "Uzbek"
|
|
}
|
|
|
|
# Map language names to language codes
|
|
LANGUAGE_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()}
|
|
|
|
# Map language names to OpenAI TTS voice options
|
|
LANGUAGE_TO_VOICE = {
|
|
"Arabic": "alloy", # Using OpenAI general voices
|
|
"Armenian": "echo", # as OpenAI doesn't have specific voices
|
|
"Azerbaijani": "nova", # for all these languages
|
|
"English": "echo", # We'll use the available voices
|
|
"French": "alloy", # and rely on the translation being
|
|
"Georgian": "fable", # in the correct language text
|
|
"Kazakh": "onyx",
|
|
"Mandarin": "shimmer",
|
|
"Farsi": "nova",
|
|
"Portuguese": "alloy",
|
|
"Russian": "echo",
|
|
"Spanish": "nova",
|
|
"Turkish": "fable",
|
|
"Uzbek": "onyx"
|
|
}
|
|
|
|
@app.route('/')
|
|
def index():
|
|
return render_template('index.html', languages=sorted(SUPPORTED_LANGUAGES.values()))
|
|
|
|
@app.route('/transcribe', methods=['POST'])
|
|
def transcribe():
|
|
if 'audio' not in request.files:
|
|
return jsonify({'error': 'No audio file provided'}), 400
|
|
|
|
audio_file = request.files['audio']
|
|
source_lang = request.form.get('source_lang', '')
|
|
|
|
# Save the audio file temporarily
|
|
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav')
|
|
audio_file.save(temp_path)
|
|
|
|
try:
|
|
# Use Whisper for transcription
|
|
result = whisper_model.transcribe(
|
|
temp_path,
|
|
language=LANGUAGE_TO_CODE.get(source_lang, None)
|
|
)
|
|
transcribed_text = result["text"]
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'text': transcribed_text
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Transcription error: {str(e)}")
|
|
return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
|
|
finally:
|
|
# Clean up the temporary file
|
|
if os.path.exists(temp_path):
|
|
os.remove(temp_path)
|
|
|
|
@app.route('/translate', methods=['POST'])
|
|
def translate():
|
|
try:
|
|
data = request.json
|
|
text = data.get('text', '')
|
|
source_lang = data.get('source_lang', '')
|
|
target_lang = data.get('target_lang', '')
|
|
|
|
if not text or not source_lang or not target_lang:
|
|
return jsonify({'error': 'Missing required parameters'}), 400
|
|
|
|
# Create a prompt for Gemma 3 translation
|
|
prompt = f"""
|
|
Translate the following text from {source_lang} to {target_lang}:
|
|
|
|
"{text}"
|
|
|
|
Provide only the translation without any additional text.
|
|
"""
|
|
|
|
# Use Ollama to interact with Gemma 3
|
|
response = ollama.chat(
|
|
model="gemma3",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": prompt
|
|
}
|
|
]
|
|
)
|
|
|
|
translated_text = response['message']['content'].strip()
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'translation': translated_text
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Translation error: {str(e)}")
|
|
return jsonify({'error': f'Translation failed: {str(e)}'}), 500
|
|
|
|
@app.route('/speak', methods=['POST'])
|
|
def speak():
|
|
try:
|
|
data = request.json
|
|
text = data.get('text', '')
|
|
language = data.get('language', '')
|
|
|
|
if not text or not language:
|
|
return jsonify({'error': 'Missing required parameters'}), 400
|
|
|
|
voice = LANGUAGE_TO_VOICE.get(language)
|
|
if not voice:
|
|
return jsonify({'error': 'Unsupported language for TTS'}), 400
|
|
|
|
# Get TTS server URL from environment or config
|
|
tts_server_url = app.config['TTS_SERVER']
|
|
|
|
try:
|
|
# Request TTS from the Edge TTS server
|
|
logger.info(f"Sending TTS request to {tts_server_url}")
|
|
tts_response = requests.post(
|
|
tts_server_url,
|
|
json={
|
|
'text': text,
|
|
'voice': voice,
|
|
'output_format': 'mp3'
|
|
},
|
|
timeout=10 # Add timeout
|
|
)
|
|
|
|
logger.info(f"TTS response status: {tts_response.status_code}")
|
|
|
|
if tts_response.status_code != 200:
|
|
error_msg = f'TTS request failed with status {tts_response.status_code}'
|
|
logger.error(error_msg)
|
|
|
|
# Try to get error details from response if possible
|
|
try:
|
|
error_details = tts_response.json()
|
|
logger.error(f"Error details: {error_details}")
|
|
except:
|
|
pass
|
|
|
|
return jsonify({'error': error_msg}), 500
|
|
|
|
# The response contains the audio data directly
|
|
temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3')
|
|
with open(temp_audio_path, 'wb') as f:
|
|
f.write(tts_response.content)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}'
|
|
})
|
|
except requests.exceptions.RequestException as e:
|
|
error_msg = f'Failed to connect to TTS server: {str(e)}'
|
|
logger.error(error_msg)
|
|
return jsonify({'error': error_msg}), 500
|
|
except Exception as e:
|
|
logger.error(f"TTS error: {str(e)}")
|
|
return jsonify({'error': f'TTS failed: {str(e)}'}), 500
|
|
|
|
@app.route('/get_audio/<filename>')
|
|
def get_audio(filename):
|
|
try:
|
|
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
return send_file(file_path, mimetype='audio/mpeg')
|
|
except Exception as e:
|
|
logger.error(f"Audio retrieval error: {str(e)}")
|
|
return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500
|
|
|
|
if __name__ == '__main__':
|
|
app.run(host='0.0.0.0', port=8000, debug=True)
|
|
EOL
|
|
|
|
# Create requirements.txt
|
|
cat > requirements.txt << 'EOL'
|
|
flask==2.3.2
|
|
requests==2.31.0
|
|
openai-whisper==20231117
|
|
torch==2.1.0
|
|
ollama==0.1.5
|
|
EOL
|
|
|
|
# Create README.md
|
|
cat > README.md << 'EOL'
|
|
# Voice Language Translator
|
|
|
|
A mobile-friendly web application that translates spoken language between multiple languages using:
|
|
- Gemma 3 open-source LLM via Ollama for translation
|
|
- OpenAI Whisper for speech-to-text
|
|
- OpenAI Edge TTS for text-to-speech
|
|
|
|
## Supported Languages
|
|
|
|
- Arabic
|
|
- Armenian
|
|
- Azerbaijani
|
|
- English
|
|
- French
|
|
- Georgian
|
|
- Kazakh
|
|
- Mandarin
|
|
- Farsi
|
|
- Portuguese
|
|
- Russian
|
|
- Spanish
|
|
- Turkish
|
|
- Uzbek
|
|
|
|
## Setup Instructions
|
|
|
|
1. Install the required Python packages:
|
|
```
|
|
pip install -r requirements.txt
|
|
```
|
|
|
|
2. Make sure you have Ollama installed and the Gemma 3 model loaded:
|
|
```
|
|
ollama pull gemma3
|
|
```
|
|
|
|
3. Ensure your OpenAI Edge TTS server is running on port 5050.
|
|
|
|
4. Run the application:
|
|
```
|
|
python app.py
|
|
```
|
|
|
|
5. Open your browser and navigate to:
|
|
```
|
|
http://localhost:8000
|
|
```
|
|
|
|
## Usage
|
|
|
|
1. Select your source language from the dropdown menu
|
|
2. Press the microphone button and speak
|
|
3. Press the button again to stop recording
|
|
4. Wait for the transcription to complete
|
|
5. Select your target language
|
|
6. Press the "Translate" button
|
|
7. Use the play buttons to hear the original or translated text
|
|
|
|
## Technical Details
|
|
|
|
- The app uses Flask for the web server
|
|
- Audio is processed client-side using the MediaRecorder API
|
|
- Whisper for speech recognition with language hints
|
|
- Ollama provides access to the Gemma 3 model for translation
|
|
- OpenAI Edge TTS delivers natural-sounding speech output
|
|
|
|
## Mobile Support
|
|
|
|
The interface is fully responsive and designed to work well on mobile devices.
|
|
EOL
|
|
|
|
# Make the script executable
|
|
chmod +x app.py
|
|
|
|
echo "Setup complete! Run the app with: python app.py"
|