Add real-time language interpreter feature

This commit adds a real-time interpreter mode to the Voice Language Translator:
- Implements continuous speech-to-translation capabilities using WebSockets
- Adds background processing threads for audio, transcription, and TTS
- Adds client-side speech detection and streaming
- Updates UI to include real-time interpreter controls
- Adds necessary dependencies (flask-socketio, eventlet)
This commit is contained in:
adelorenzo
2025-04-12 00:39:16 -06:00
parent 5a7f5f04ad
commit d46df7939a
4 changed files with 666 additions and 384 deletions

View File

@@ -11,6 +11,8 @@
<link rel="apple-touch-icon" sizes="152x152" href="/static/icons/apple-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/static/icons/apple-icon-180x180.png">
<link rel="apple-touch-icon" sizes="167x167" href="/static/icons/apple-icon-167x167.png">
<!-- Add Socket.IO client library -->
<script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
<style>
body {
padding-top: 20px;
@@ -74,6 +76,8 @@
background-color: #f8f9fa;
border-radius: 10px;
margin-bottom: 15px;
overflow-y: auto;
max-height: 300px;
}
.btn-action {
border-radius: 10px;
@@ -191,290 +195,48 @@
</button>
</div>
<!-- Add real-time interpreter button -->
<div class="text-center mt-3">
<button id="realtimeBtn" class="btn btn-primary">
<i class="fas fa-language"></i> Start Interpreter
</button>
<p class="status-indicator" id="realtimeStatusIndicator">Interpreter ready</p>
</div>
<div class="mt-3">
<div class="progress d-none" id="progressContainer">
<div id="progressBar" class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" style="width: 0%"></div>
</div>
</div>
<!-- Add explanation of real-time interpreter mode -->
<div class="row mt-4">
<div class="col-md-12">
<div class="card">
<div class="card-header bg-info text-white">
<h5 class="mb-0">Real-time Interpretation Mode</h5>
</div>
<div class="card-body">
<p>This mode enables continuous, real-time interpretation as you speak:</p>
<ul>
<li><strong>Start Interpreter</strong> - Begin continuous speech interpretation</li>
<li><strong>Stop Interpreter</strong> - End the real-time interpretation session</li>
</ul>
<p class="text-muted">
<small>
The interpreter will automatically detect speech, transcribe, translate, and speak the translation with minimal delay.
Pause detection automatically handles natural breaks in conversation.
</small>
</p>
</div>
</div>
</div>
</div>
<audio id="audioPlayer" style="display: none;"></audio>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
// DOM elements
const recordBtn = document.getElementById('recordBtn');
const translateBtn = document.getElementById('translateBtn');
const sourceText = document.getElementById('sourceText');
const translatedText = document.getElementById('translatedText');
const sourceLanguage = document.getElementById('sourceLanguage');
const targetLanguage = document.getElementById('targetLanguage');
const playSource = document.getElementById('playSource');
const playTranslation = document.getElementById('playTranslation');
const clearSource = document.getElementById('clearSource');
const clearTranslation = document.getElementById('clearTranslation');
const statusIndicator = document.getElementById('statusIndicator');
const progressContainer = document.getElementById('progressContainer');
const progressBar = document.getElementById('progressBar');
const audioPlayer = document.getElementById('audioPlayer');
// Set initial values
let isRecording = false;
let mediaRecorder = null;
let audioChunks = [];
let currentSourceText = '';
let currentTranslationText = '';
// Make sure target language is different from source
if (targetLanguage.options[0].value === sourceLanguage.value) {
targetLanguage.selectedIndex = 1;
}
// Event listeners for language selection
sourceLanguage.addEventListener('change', function() {
if (targetLanguage.value === sourceLanguage.value) {
for (let i = 0; i < targetLanguage.options.length; i++) {
if (targetLanguage.options[i].value !== sourceLanguage.value) {
targetLanguage.selectedIndex = i;
break;
}
}
}
});
targetLanguage.addEventListener('change', function() {
if (targetLanguage.value === sourceLanguage.value) {
for (let i = 0; i < sourceLanguage.options.length; i++) {
if (sourceLanguage.options[i].value !== targetLanguage.value) {
sourceLanguage.selectedIndex = i;
break;
}
}
}
});
// Record button click event
recordBtn.addEventListener('click', function() {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
});
// Function to start recording
function startRecording() {
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.addEventListener('dataavailable', event => {
audioChunks.push(event.data);
});
mediaRecorder.addEventListener('stop', () => {
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
transcribeAudio(audioBlob);
});
mediaRecorder.start();
isRecording = true;
recordBtn.classList.add('recording');
recordBtn.classList.replace('btn-primary', 'btn-danger');
recordBtn.innerHTML = '<i class="fas fa-stop"></i>';
statusIndicator.textContent = 'Recording... Click to stop';
})
.catch(error => {
console.error('Error accessing microphone:', error);
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
});
}
// Function to stop recording
function stopRecording() {
mediaRecorder.stop();
isRecording = false;
recordBtn.classList.remove('recording');
recordBtn.classList.replace('btn-danger', 'btn-primary');
recordBtn.innerHTML = '<i class="fas fa-microphone"></i>';
statusIndicator.textContent = 'Processing audio...';
// Stop all audio tracks
mediaRecorder.stream.getTracks().forEach(track => track.stop());
}
// Function to transcribe audio
function transcribeAudio(audioBlob) {
const formData = new FormData();
formData.append('audio', audioBlob);
formData.append('source_lang', sourceLanguage.value);
showProgress();
fetch('/transcribe', {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
hideProgress();
if (data.success) {
currentSourceText = data.text;
sourceText.innerHTML = `<p>${data.text}</p>`;
playSource.disabled = false;
translateBtn.disabled = false;
statusIndicator.textContent = 'Transcription complete';
} else {
sourceText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
statusIndicator.textContent = 'Transcription failed';
}
})
.catch(error => {
hideProgress();
console.error('Transcription error:', error);
sourceText.innerHTML = `<p class="text-danger">Failed to transcribe audio. Please try again.</p>`;
statusIndicator.textContent = 'Transcription failed';
});
}
// Translate button click event
translateBtn.addEventListener('click', function() {
if (!currentSourceText) {
return;
}
statusIndicator.textContent = 'Translating...';
showProgress();
fetch('/translate', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: currentSourceText,
source_lang: sourceLanguage.value,
target_lang: targetLanguage.value
})
})
.then(response => response.json())
.then(data => {
hideProgress();
if (data.success) {
currentTranslationText = data.translation;
translatedText.innerHTML = `<p>${data.translation}</p>`;
playTranslation.disabled = false;
statusIndicator.textContent = 'Translation complete';
} else {
translatedText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
statusIndicator.textContent = 'Translation failed';
}
})
.catch(error => {
hideProgress();
console.error('Translation error:', error);
translatedText.innerHTML = `<p class="text-danger">Failed to translate. Please try again.</p>`;
statusIndicator.textContent = 'Translation failed';
});
});
// Play source text
playSource.addEventListener('click', function() {
if (!currentSourceText) return;
playAudio(currentSourceText, sourceLanguage.value);
statusIndicator.textContent = 'Playing source audio...';
});
// Play translation
playTranslation.addEventListener('click', function() {
if (!currentTranslationText) return;
playAudio(currentTranslationText, targetLanguage.value);
statusIndicator.textContent = 'Playing translation audio...';
});
// Function to play audio via TTS
function playAudio(text, language) {
showProgress();
fetch('/speak', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: text,
language: language
})
})
.then(response => response.json())
.then(data => {
hideProgress();
if (data.success) {
audioPlayer.src = data.audio_url;
audioPlayer.onended = function() {
statusIndicator.textContent = 'Ready';
};
audioPlayer.play();
} else {
statusIndicator.textContent = 'TTS failed';
alert('Failed to play audio: ' + data.error);
}
})
.catch(error => {
hideProgress();
console.error('TTS error:', error);
statusIndicator.textContent = 'TTS failed';
});
}
// Clear buttons
clearSource.addEventListener('click', function() {
sourceText.innerHTML = '<p class="text-muted">Your transcribed text will appear here...</p>';
currentSourceText = '';
playSource.disabled = true;
translateBtn.disabled = true;
});
clearTranslation.addEventListener('click', function() {
translatedText.innerHTML = '<p class="text-muted">Translation will appear here...</p>';
currentTranslationText = '';
playTranslation.disabled = true;
});
// Progress indicator functions
function showProgress() {
progressContainer.classList.remove('d-none');
let progress = 0;
const interval = setInterval(() => {
progress += 5;
if (progress > 90) {
clearInterval(interval);
}
progressBar.style.width = `${progress}%`;
}, 100);
progressBar.dataset.interval = interval;
}
function hideProgress() {
const interval = progressBar.dataset.interval;
if (interval) {
clearInterval(Number(interval));
}
progressBar.style.width = '100%';
setTimeout(() => {
progressContainer.classList.add('d-none');
progressBar.style.width = '0%';
}, 500);
}
});
</script>
<script src="/static/js/app.js"></script>
</body>
</html>