Add real-time language interpreter feature

This commit adds a real-time interpreter mode to the Voice Language Translator:
- Implements continuous speech-to-translation capabilities using WebSockets
- Adds background processing threads for audio, transcription, and TTS
- Adds client-side speech detection and streaming
- Updates UI to include real-time interpreter controls
- Adds necessary dependencies (flask-socketio, eventlet)
This commit is contained in:
adelorenzo
2025-04-12 00:39:16 -06:00
parent 5a7f5f04ad
commit d46df7939a
4 changed files with 666 additions and 384 deletions

View File

@@ -8,6 +8,9 @@ document.addEventListener('DOMContentLoaded', function() {
// Initialize app
initApp();
// Initialize the real-time interpreter
initRealtimeInterpreter();
// Check for PWA installation prompts
initInstallPrompt();
});
@@ -64,11 +67,6 @@ function initApp() {
const progressContainer = document.getElementById('progressContainer');
const progressBar = document.getElementById('progressBar');
const audioPlayer = document.getElementById('audioPlayer');
const ttsServerAlert = document.getElementById('ttsServerAlert');
const ttsServerMessage = document.getElementById('ttsServerMessage');
const ttsServerUrl = document.getElementById('ttsServerUrl');
const ttsApiKey = document.getElementById('ttsApiKey');
const updateTtsServer = document.getElementById('updateTtsServer');
// Set initial values
let isRecording = false;
@@ -76,52 +74,6 @@ function initApp() {
let audioChunks = [];
let currentSourceText = '';
let currentTranslationText = '';
let currentTtsServerUrl = '';
// Check TTS server status on page load
checkTtsServer();
// Check for saved translations in IndexedDB
loadSavedTranslations();
// Update TTS server URL and API key
updateTtsServer.addEventListener('click', function() {
const newUrl = ttsServerUrl.value.trim();
const newApiKey = ttsApiKey.value.trim();
if (!newUrl && !newApiKey) {
alert('Please provide at least one value to update');
return;
}
const updateData = {};
if (newUrl) updateData.server_url = newUrl;
if (newApiKey) updateData.api_key = newApiKey;
fetch('/update_tts_config', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(updateData)
})
.then(response => response.json())
.then(data => {
if (data.success) {
statusIndicator.textContent = 'TTS configuration updated';
// Save URL to localStorage but not the API key for security
if (newUrl) localStorage.setItem('ttsServerUrl', newUrl);
// Check TTS server with new configuration
checkTtsServer();
} else {
alert('Failed to update TTS configuration: ' + data.error);
}
})
.catch(error => {
console.error('Failed to update TTS config:', error);
alert('Failed to update TTS configuration. See console for details.');
});
});
// Make sure target language is different from source
if (targetLanguage.options[0].value === sourceLanguage.value) {
@@ -337,29 +289,13 @@ function initApp() {
audioPlayer.play();
} else {
statusIndicator.textContent = 'TTS failed';
// Show TTS server alert with error message
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = data.error;
alert('Failed to play audio: ' + data.error);
// Check TTS server status again
checkTtsServer();
}
})
.catch(error => {
hideProgress();
console.error('TTS error:', error);
statusIndicator.textContent = 'TTS failed';
// Show TTS server alert
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = 'Failed to connect to TTS server';
});
}
@@ -377,48 +313,6 @@ function initApp() {
playTranslation.disabled = true;
});
// Function to check TTS server status
function checkTtsServer() {
fetch('/check_tts_server')
.then(response => response.json())
.then(data => {
currentTtsServerUrl = data.url;
ttsServerUrl.value = currentTtsServerUrl;
// Load saved API key if available
const savedApiKey = localStorage.getItem('ttsApiKeySet');
if (savedApiKey === 'true') {
ttsApiKey.placeholder = '••••••• (API key saved)';
}
if (data.status === 'error' || data.status === 'auth_error') {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = data.message;
if (data.status === 'auth_error') {
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
}
} else {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-warning');
ttsServerAlert.classList.add('alert-success');
ttsServerMessage.textContent = 'TTS server is online and ready.';
setTimeout(() => {
ttsServerAlert.classList.add('d-none');
}, 3000);
}
})
.catch(error => {
console.error('Failed to check TTS server:', error);
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = 'Failed to check TTS server status.';
});
}
// Progress indicator functions
function showProgress() {
progressContainer.classList.remove('d-none');
@@ -444,6 +338,394 @@ function initApp() {
progressBar.style.width = '0%';
}, 500);
}
// Check TTS server status on page load if the alert element exists
const ttsServerAlert = document.getElementById('ttsServerAlert');
if (ttsServerAlert) {
checkTtsServer();
}
// Function to check TTS server status
function checkTtsServer() {
const ttsServerMessage = document.getElementById('ttsServerMessage');
const ttsServerUrl = document.getElementById('ttsServerUrl');
fetch('/check_tts_server')
.then(response => response.json())
.then(data => {
let currentTtsServerUrl = data.url;
if (ttsServerUrl) ttsServerUrl.value = currentTtsServerUrl;
// Load saved API key if available
const savedApiKey = localStorage.getItem('ttsApiKeySet');
const ttsApiKey = document.getElementById('ttsApiKey');
if (ttsApiKey && savedApiKey === 'true') {
ttsApiKey.placeholder = '••••••• (API key saved)';
}
if (ttsServerAlert && ttsServerMessage) {
if (data.status === 'error' || data.status === 'auth_error') {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = data.message;
if (data.status === 'auth_error') {
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
}
} else {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-warning');
ttsServerAlert.classList.add('alert-success');
ttsServerMessage.textContent = 'TTS server is online and ready.';
setTimeout(() => {
ttsServerAlert.classList.add('d-none');
}, 3000);
}
}
})
.catch(error => {
console.error('Failed to check TTS server:', error);
if (ttsServerAlert && ttsServerMessage) {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = 'Failed to check TTS server status.';
}
});
}
}
// Real-time interpreter module
function initRealtimeInterpreter() {
// DOM elements
const realtimeBtn = document.getElementById('realtimeBtn');
const realtimeStatusIndicator = document.getElementById('realtimeStatusIndicator');
const sourceLanguage = document.getElementById('sourceLanguage');
const targetLanguage = document.getElementById('targetLanguage');
const sourceText = document.getElementById('sourceText');
const translatedText = document.getElementById('translatedText');
const audioPlayer = document.getElementById('audioPlayer');
// SocketIO connection
let socket = null;
let mediaRecorder = null;
let audioContext = null;
let isInterpreting = false;
let sessionId = null;
// Audio processing variables
const bufferSize = 4096;
let audioProcessor = null;
let micStream = null;
// Initialize the audio context
function initAudioContext() {
try {
window.AudioContext = window.AudioContext || window.webkitAudioContext;
audioContext = new AudioContext();
return true;
} catch (e) {
console.error('Web Audio API is not supported in this browser', e);
return false;
}
}
// Connect to Socket.IO server
function connectSocket() {
if (socket) {
return; // Already connected
}
// Connect to the same host where the Flask app is running
socket = io.connect(window.location.origin, {
forceNew: true
});
// Socket event handlers
socket.on('connect', () => {
console.log('Socket connected');
});
socket.on('disconnect', () => {
console.log('Socket disconnected');
stopInterpreting();
});
socket.on('error', (data) => {
console.error('Socket error:', data.message);
realtimeStatusIndicator.textContent = `Error: ${data.message}`;
});
socket.on('session_started', (data) => {
sessionId = data.session_id;
console.log('Interpreter session started:', sessionId);
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
});
socket.on('session_ended', () => {
console.log('Interpreter session ended');
sessionId = null;
realtimeStatusIndicator.textContent = 'Interpreter stopped';
});
socket.on('chunk_received', () => {
// This is a confirmation that the server received our audio chunk
// We can use this to update UI if needed
});
socket.on('transcription_result', (data) => {
if (data.session_id === sessionId) {
// Update source text with transcription
if (sourceText.querySelector('p.text-muted')) {
sourceText.innerHTML = ''; // Clear placeholder
}
const p = document.createElement('p');
p.textContent = data.text;
sourceText.appendChild(p);
// Auto-scroll to bottom
sourceText.scrollTop = sourceText.scrollHeight;
}
});
socket.on('translation_result', (data) => {
if (data.session_id === sessionId) {
// Update translated text
if (translatedText.querySelector('p.text-muted')) {
translatedText.innerHTML = ''; // Clear placeholder
}
const p = document.createElement('p');
p.textContent = data.text;
translatedText.appendChild(p);
// Auto-scroll to bottom
translatedText.scrollTop = translatedText.scrollHeight;
}
});
socket.on('audio_ready', (data) => {
if (data.session_id === sessionId) {
// Play the translated audio
audioPlayer.src = data.audio_url;
audioPlayer.play();
}
});
}
// Start the real-time interpreter
function startInterpreting() {
if (!initAudioContext()) {
alert('Your browser does not support the Web Audio API required for the real-time interpreter.');
return;
}
connectSocket();
// Request microphone access
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
.then((stream) => {
micStream = stream;
// Start a new interpreter session
socket.emit('start_interpreter_session', {
source_lang: sourceLanguage.value,
target_lang: targetLanguage.value
});
// Setup audio processing
const audioInput = audioContext.createMediaStreamSource(stream);
audioProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
// Connect the audio processing node
audioInput.connect(audioProcessor);
audioProcessor.connect(audioContext.destination);
// Process audio data
let silenceStart = performance.now();
let isSilent = true;
const silenceThreshold = 0.01; // Adjust based on testing
const silenceDelay = 1000; // 1 second of silence before stopping
audioProcessor.onaudioprocess = function(e) {
if (!isInterpreting) return;
// Get audio data
const inputData = e.inputBuffer.getChannelData(0);
// Check for silence
let sum = 0;
for (let i = 0; i < inputData.length; i++) {
sum += Math.abs(inputData[i]);
}
const average = sum / inputData.length;
// Detect speech vs silence
if (average > silenceThreshold) {
if (isSilent) {
isSilent = false;
realtimeStatusIndicator.textContent = 'Interpreting...';
}
silenceStart = performance.now(); // Reset silence timer
} else if (!isSilent && (performance.now() - silenceStart > silenceDelay)) {
isSilent = true;
realtimeStatusIndicator.textContent = 'Waiting for speech...';
}
// Convert buffer to WAV format
const wavBuffer = convertToWav(inputData);
// Send to server if not silent or within silence delay
if (!isSilent || (performance.now() - silenceStart <= silenceDelay)) {
socket.emit('audio_chunk', {
audio: wavBuffer
});
}
};
// Update UI
isInterpreting = true;
realtimeBtn.textContent = 'Stop Interpreter';
realtimeBtn.classList.replace('btn-primary', 'btn-danger');
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
// Disable language selectors during interpretation
sourceLanguage.disabled = true;
targetLanguage.disabled = true;
})
.catch((error) => {
console.error('Error accessing microphone:', error);
realtimeStatusIndicator.textContent = 'Error: Microphone access denied';
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
});
}
// Stop the real-time interpreter
function stopInterpreting() {
if (!isInterpreting) return;
// Stop audio processing
if (audioProcessor) {
audioProcessor.disconnect();
audioProcessor = null;
}
// Stop microphone stream
if (micStream) {
micStream.getTracks().forEach(track => track.stop());
micStream = null;
}
// End the interpreter session
if (socket && socket.connected) {
socket.emit('end_interpreter_session');
}
// Update UI
isInterpreting = false;
realtimeBtn.textContent = 'Start Interpreter';
realtimeBtn.classList.replace('btn-danger', 'btn-primary');
realtimeStatusIndicator.textContent = 'Interpreter ready';
// Re-enable language selectors
sourceLanguage.disabled = false;
targetLanguage.disabled = false;
}
// Convert audio buffer to WAV format
function convertToWav(audioBuffer) {
// Simple WAV header creation
const createWavHeader = function(sampleRate, bitsPerSample, channels, dataLength) {
const headerLength = 44;
const wavHeader = new ArrayBuffer(headerLength);
const view = new DataView(wavHeader);
// RIFF identifier
writeString(view, 0, 'RIFF');
// File length
view.setUint32(4, 32 + dataLength, true);
// RIFF type
writeString(view, 8, 'WAVE');
// Format chunk identifier
writeString(view, 12, 'fmt ');
// Format chunk length
view.setUint32(16, 16, true);
// Sample format (raw)
view.setUint16(20, 1, true);
// Channel count
view.setUint16(22, channels, true);
// Sample rate
view.setUint32(24, sampleRate, true);
// Byte rate (sample rate * block align)
view.setUint32(28, sampleRate * channels * (bitsPerSample / 8), true);
// Block align (channel count * bytes per sample)
view.setUint16(32, channels * (bitsPerSample / 8), true);
// Bits per sample
view.setUint16(34, bitsPerSample, true);
// Data chunk identifier
writeString(view, 36, 'data');
// Data chunk length
view.setUint32(40, dataLength, true);
return wavHeader;
};
const writeString = function(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
// Sample parameters
const sampleRate = audioContext.sampleRate;
const bitsPerSample = 16;
const channels = 1;
// Create the audio data buffer
const dataLength = audioBuffer.length * 2; // 16-bit = 2 bytes per sample
const audioData = new ArrayBuffer(dataLength);
const dataView = new DataView(audioData);
// Convert float32 to int16
let offset = 0;
for (let i = 0; i < audioBuffer.length; i++) {
const s = Math.max(-1, Math.min(1, audioBuffer[i]));
dataView.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
offset += 2;
}
// Create the WAV header
const header = createWavHeader(sampleRate, bitsPerSample, channels, dataLength);
// Combine header and audio data
const wavBuffer = new Uint8Array(header.byteLength + audioData.byteLength);
wavBuffer.set(new Uint8Array(header), 0);
wavBuffer.set(new Uint8Array(audioData), header.byteLength);
return wavBuffer;
}
// Toggle interpreter on button click
realtimeBtn.addEventListener('click', function() {
if (isInterpreting) {
stopInterpreting();
} else {
startInterpreting();
}
});
// Cleanup function for when the page is unloaded
window.addEventListener('beforeunload', function() {
stopInterpreting();
if (socket) {
socket.disconnect();
}
});
// Initialize status
realtimeStatusIndicator.textContent = 'Interpreter ready';
}
// IndexedDB functions for offline data storage