Add real-time language interpreter feature
This commit adds a real-time interpreter mode to the Voice Language Translator: - Implements continuous speech-to-translation capabilities using WebSockets - Adds background processing threads for audio, transcription, and TTS - Adds client-side speech detection and streaming - Updates UI to include real-time interpreter controls - Adds necessary dependencies (flask-socketio, eventlet)
This commit is contained in:
500
static/js/app.js
500
static/js/app.js
@@ -8,6 +8,9 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
// Initialize app
|
||||
initApp();
|
||||
|
||||
// Initialize the real-time interpreter
|
||||
initRealtimeInterpreter();
|
||||
|
||||
// Check for PWA installation prompts
|
||||
initInstallPrompt();
|
||||
});
|
||||
@@ -64,11 +67,6 @@ function initApp() {
|
||||
const progressContainer = document.getElementById('progressContainer');
|
||||
const progressBar = document.getElementById('progressBar');
|
||||
const audioPlayer = document.getElementById('audioPlayer');
|
||||
const ttsServerAlert = document.getElementById('ttsServerAlert');
|
||||
const ttsServerMessage = document.getElementById('ttsServerMessage');
|
||||
const ttsServerUrl = document.getElementById('ttsServerUrl');
|
||||
const ttsApiKey = document.getElementById('ttsApiKey');
|
||||
const updateTtsServer = document.getElementById('updateTtsServer');
|
||||
|
||||
// Set initial values
|
||||
let isRecording = false;
|
||||
@@ -76,52 +74,6 @@ function initApp() {
|
||||
let audioChunks = [];
|
||||
let currentSourceText = '';
|
||||
let currentTranslationText = '';
|
||||
let currentTtsServerUrl = '';
|
||||
|
||||
// Check TTS server status on page load
|
||||
checkTtsServer();
|
||||
|
||||
// Check for saved translations in IndexedDB
|
||||
loadSavedTranslations();
|
||||
|
||||
// Update TTS server URL and API key
|
||||
updateTtsServer.addEventListener('click', function() {
|
||||
const newUrl = ttsServerUrl.value.trim();
|
||||
const newApiKey = ttsApiKey.value.trim();
|
||||
|
||||
if (!newUrl && !newApiKey) {
|
||||
alert('Please provide at least one value to update');
|
||||
return;
|
||||
}
|
||||
|
||||
const updateData = {};
|
||||
if (newUrl) updateData.server_url = newUrl;
|
||||
if (newApiKey) updateData.api_key = newApiKey;
|
||||
|
||||
fetch('/update_tts_config', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(updateData)
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
statusIndicator.textContent = 'TTS configuration updated';
|
||||
// Save URL to localStorage but not the API key for security
|
||||
if (newUrl) localStorage.setItem('ttsServerUrl', newUrl);
|
||||
// Check TTS server with new configuration
|
||||
checkTtsServer();
|
||||
} else {
|
||||
alert('Failed to update TTS configuration: ' + data.error);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Failed to update TTS config:', error);
|
||||
alert('Failed to update TTS configuration. See console for details.');
|
||||
});
|
||||
});
|
||||
|
||||
// Make sure target language is different from source
|
||||
if (targetLanguage.options[0].value === sourceLanguage.value) {
|
||||
@@ -337,29 +289,13 @@ function initApp() {
|
||||
audioPlayer.play();
|
||||
} else {
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
|
||||
// Show TTS server alert with error message
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = data.error;
|
||||
|
||||
alert('Failed to play audio: ' + data.error);
|
||||
|
||||
// Check TTS server status again
|
||||
checkTtsServer();
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('TTS error:', error);
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
|
||||
// Show TTS server alert
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = 'Failed to connect to TTS server';
|
||||
});
|
||||
}
|
||||
|
||||
@@ -377,48 +313,6 @@ function initApp() {
|
||||
playTranslation.disabled = true;
|
||||
});
|
||||
|
||||
// Function to check TTS server status
|
||||
function checkTtsServer() {
|
||||
fetch('/check_tts_server')
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
currentTtsServerUrl = data.url;
|
||||
ttsServerUrl.value = currentTtsServerUrl;
|
||||
|
||||
// Load saved API key if available
|
||||
const savedApiKey = localStorage.getItem('ttsApiKeySet');
|
||||
if (savedApiKey === 'true') {
|
||||
ttsApiKey.placeholder = '••••••• (API key saved)';
|
||||
}
|
||||
|
||||
if (data.status === 'error' || data.status === 'auth_error') {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = data.message;
|
||||
|
||||
if (data.status === 'auth_error') {
|
||||
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
|
||||
}
|
||||
} else {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-warning');
|
||||
ttsServerAlert.classList.add('alert-success');
|
||||
ttsServerMessage.textContent = 'TTS server is online and ready.';
|
||||
setTimeout(() => {
|
||||
ttsServerAlert.classList.add('d-none');
|
||||
}, 3000);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Failed to check TTS server:', error);
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = 'Failed to check TTS server status.';
|
||||
});
|
||||
}
|
||||
|
||||
// Progress indicator functions
|
||||
function showProgress() {
|
||||
progressContainer.classList.remove('d-none');
|
||||
@@ -444,6 +338,394 @@ function initApp() {
|
||||
progressBar.style.width = '0%';
|
||||
}, 500);
|
||||
}
|
||||
|
||||
// Check TTS server status on page load if the alert element exists
|
||||
const ttsServerAlert = document.getElementById('ttsServerAlert');
|
||||
if (ttsServerAlert) {
|
||||
checkTtsServer();
|
||||
}
|
||||
|
||||
// Function to check TTS server status
|
||||
function checkTtsServer() {
|
||||
const ttsServerMessage = document.getElementById('ttsServerMessage');
|
||||
const ttsServerUrl = document.getElementById('ttsServerUrl');
|
||||
|
||||
fetch('/check_tts_server')
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
let currentTtsServerUrl = data.url;
|
||||
if (ttsServerUrl) ttsServerUrl.value = currentTtsServerUrl;
|
||||
|
||||
// Load saved API key if available
|
||||
const savedApiKey = localStorage.getItem('ttsApiKeySet');
|
||||
const ttsApiKey = document.getElementById('ttsApiKey');
|
||||
if (ttsApiKey && savedApiKey === 'true') {
|
||||
ttsApiKey.placeholder = '••••••• (API key saved)';
|
||||
}
|
||||
|
||||
if (ttsServerAlert && ttsServerMessage) {
|
||||
if (data.status === 'error' || data.status === 'auth_error') {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = data.message;
|
||||
|
||||
if (data.status === 'auth_error') {
|
||||
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
|
||||
}
|
||||
} else {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-warning');
|
||||
ttsServerAlert.classList.add('alert-success');
|
||||
ttsServerMessage.textContent = 'TTS server is online and ready.';
|
||||
setTimeout(() => {
|
||||
ttsServerAlert.classList.add('d-none');
|
||||
}, 3000);
|
||||
}
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Failed to check TTS server:', error);
|
||||
if (ttsServerAlert && ttsServerMessage) {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = 'Failed to check TTS server status.';
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Real-time interpreter module
|
||||
function initRealtimeInterpreter() {
|
||||
// DOM elements
|
||||
const realtimeBtn = document.getElementById('realtimeBtn');
|
||||
const realtimeStatusIndicator = document.getElementById('realtimeStatusIndicator');
|
||||
const sourceLanguage = document.getElementById('sourceLanguage');
|
||||
const targetLanguage = document.getElementById('targetLanguage');
|
||||
const sourceText = document.getElementById('sourceText');
|
||||
const translatedText = document.getElementById('translatedText');
|
||||
const audioPlayer = document.getElementById('audioPlayer');
|
||||
|
||||
// SocketIO connection
|
||||
let socket = null;
|
||||
let mediaRecorder = null;
|
||||
let audioContext = null;
|
||||
let isInterpreting = false;
|
||||
let sessionId = null;
|
||||
|
||||
// Audio processing variables
|
||||
const bufferSize = 4096;
|
||||
let audioProcessor = null;
|
||||
let micStream = null;
|
||||
|
||||
// Initialize the audio context
|
||||
function initAudioContext() {
|
||||
try {
|
||||
window.AudioContext = window.AudioContext || window.webkitAudioContext;
|
||||
audioContext = new AudioContext();
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error('Web Audio API is not supported in this browser', e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Connect to Socket.IO server
|
||||
function connectSocket() {
|
||||
if (socket) {
|
||||
return; // Already connected
|
||||
}
|
||||
|
||||
// Connect to the same host where the Flask app is running
|
||||
socket = io.connect(window.location.origin, {
|
||||
forceNew: true
|
||||
});
|
||||
|
||||
// Socket event handlers
|
||||
socket.on('connect', () => {
|
||||
console.log('Socket connected');
|
||||
});
|
||||
|
||||
socket.on('disconnect', () => {
|
||||
console.log('Socket disconnected');
|
||||
stopInterpreting();
|
||||
});
|
||||
|
||||
socket.on('error', (data) => {
|
||||
console.error('Socket error:', data.message);
|
||||
realtimeStatusIndicator.textContent = `Error: ${data.message}`;
|
||||
});
|
||||
|
||||
socket.on('session_started', (data) => {
|
||||
sessionId = data.session_id;
|
||||
console.log('Interpreter session started:', sessionId);
|
||||
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
|
||||
});
|
||||
|
||||
socket.on('session_ended', () => {
|
||||
console.log('Interpreter session ended');
|
||||
sessionId = null;
|
||||
realtimeStatusIndicator.textContent = 'Interpreter stopped';
|
||||
});
|
||||
|
||||
socket.on('chunk_received', () => {
|
||||
// This is a confirmation that the server received our audio chunk
|
||||
// We can use this to update UI if needed
|
||||
});
|
||||
|
||||
socket.on('transcription_result', (data) => {
|
||||
if (data.session_id === sessionId) {
|
||||
// Update source text with transcription
|
||||
if (sourceText.querySelector('p.text-muted')) {
|
||||
sourceText.innerHTML = ''; // Clear placeholder
|
||||
}
|
||||
|
||||
const p = document.createElement('p');
|
||||
p.textContent = data.text;
|
||||
sourceText.appendChild(p);
|
||||
|
||||
// Auto-scroll to bottom
|
||||
sourceText.scrollTop = sourceText.scrollHeight;
|
||||
}
|
||||
});
|
||||
|
||||
socket.on('translation_result', (data) => {
|
||||
if (data.session_id === sessionId) {
|
||||
// Update translated text
|
||||
if (translatedText.querySelector('p.text-muted')) {
|
||||
translatedText.innerHTML = ''; // Clear placeholder
|
||||
}
|
||||
|
||||
const p = document.createElement('p');
|
||||
p.textContent = data.text;
|
||||
translatedText.appendChild(p);
|
||||
|
||||
// Auto-scroll to bottom
|
||||
translatedText.scrollTop = translatedText.scrollHeight;
|
||||
}
|
||||
});
|
||||
|
||||
socket.on('audio_ready', (data) => {
|
||||
if (data.session_id === sessionId) {
|
||||
// Play the translated audio
|
||||
audioPlayer.src = data.audio_url;
|
||||
audioPlayer.play();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Start the real-time interpreter
|
||||
function startInterpreting() {
|
||||
if (!initAudioContext()) {
|
||||
alert('Your browser does not support the Web Audio API required for the real-time interpreter.');
|
||||
return;
|
||||
}
|
||||
|
||||
connectSocket();
|
||||
|
||||
// Request microphone access
|
||||
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
|
||||
.then((stream) => {
|
||||
micStream = stream;
|
||||
|
||||
// Start a new interpreter session
|
||||
socket.emit('start_interpreter_session', {
|
||||
source_lang: sourceLanguage.value,
|
||||
target_lang: targetLanguage.value
|
||||
});
|
||||
|
||||
// Setup audio processing
|
||||
const audioInput = audioContext.createMediaStreamSource(stream);
|
||||
audioProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
|
||||
|
||||
// Connect the audio processing node
|
||||
audioInput.connect(audioProcessor);
|
||||
audioProcessor.connect(audioContext.destination);
|
||||
|
||||
// Process audio data
|
||||
let silenceStart = performance.now();
|
||||
let isSilent = true;
|
||||
const silenceThreshold = 0.01; // Adjust based on testing
|
||||
const silenceDelay = 1000; // 1 second of silence before stopping
|
||||
|
||||
audioProcessor.onaudioprocess = function(e) {
|
||||
if (!isInterpreting) return;
|
||||
|
||||
// Get audio data
|
||||
const inputData = e.inputBuffer.getChannelData(0);
|
||||
|
||||
// Check for silence
|
||||
let sum = 0;
|
||||
for (let i = 0; i < inputData.length; i++) {
|
||||
sum += Math.abs(inputData[i]);
|
||||
}
|
||||
const average = sum / inputData.length;
|
||||
|
||||
// Detect speech vs silence
|
||||
if (average > silenceThreshold) {
|
||||
if (isSilent) {
|
||||
isSilent = false;
|
||||
realtimeStatusIndicator.textContent = 'Interpreting...';
|
||||
}
|
||||
silenceStart = performance.now(); // Reset silence timer
|
||||
} else if (!isSilent && (performance.now() - silenceStart > silenceDelay)) {
|
||||
isSilent = true;
|
||||
realtimeStatusIndicator.textContent = 'Waiting for speech...';
|
||||
}
|
||||
|
||||
// Convert buffer to WAV format
|
||||
const wavBuffer = convertToWav(inputData);
|
||||
|
||||
// Send to server if not silent or within silence delay
|
||||
if (!isSilent || (performance.now() - silenceStart <= silenceDelay)) {
|
||||
socket.emit('audio_chunk', {
|
||||
audio: wavBuffer
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Update UI
|
||||
isInterpreting = true;
|
||||
realtimeBtn.textContent = 'Stop Interpreter';
|
||||
realtimeBtn.classList.replace('btn-primary', 'btn-danger');
|
||||
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
|
||||
|
||||
// Disable language selectors during interpretation
|
||||
sourceLanguage.disabled = true;
|
||||
targetLanguage.disabled = true;
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Error accessing microphone:', error);
|
||||
realtimeStatusIndicator.textContent = 'Error: Microphone access denied';
|
||||
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
|
||||
});
|
||||
}
|
||||
|
||||
// Stop the real-time interpreter
|
||||
function stopInterpreting() {
|
||||
if (!isInterpreting) return;
|
||||
|
||||
// Stop audio processing
|
||||
if (audioProcessor) {
|
||||
audioProcessor.disconnect();
|
||||
audioProcessor = null;
|
||||
}
|
||||
|
||||
// Stop microphone stream
|
||||
if (micStream) {
|
||||
micStream.getTracks().forEach(track => track.stop());
|
||||
micStream = null;
|
||||
}
|
||||
|
||||
// End the interpreter session
|
||||
if (socket && socket.connected) {
|
||||
socket.emit('end_interpreter_session');
|
||||
}
|
||||
|
||||
// Update UI
|
||||
isInterpreting = false;
|
||||
realtimeBtn.textContent = 'Start Interpreter';
|
||||
realtimeBtn.classList.replace('btn-danger', 'btn-primary');
|
||||
realtimeStatusIndicator.textContent = 'Interpreter ready';
|
||||
|
||||
// Re-enable language selectors
|
||||
sourceLanguage.disabled = false;
|
||||
targetLanguage.disabled = false;
|
||||
}
|
||||
|
||||
// Convert audio buffer to WAV format
|
||||
function convertToWav(audioBuffer) {
|
||||
// Simple WAV header creation
|
||||
const createWavHeader = function(sampleRate, bitsPerSample, channels, dataLength) {
|
||||
const headerLength = 44;
|
||||
const wavHeader = new ArrayBuffer(headerLength);
|
||||
const view = new DataView(wavHeader);
|
||||
|
||||
// RIFF identifier
|
||||
writeString(view, 0, 'RIFF');
|
||||
// File length
|
||||
view.setUint32(4, 32 + dataLength, true);
|
||||
// RIFF type
|
||||
writeString(view, 8, 'WAVE');
|
||||
// Format chunk identifier
|
||||
writeString(view, 12, 'fmt ');
|
||||
// Format chunk length
|
||||
view.setUint32(16, 16, true);
|
||||
// Sample format (raw)
|
||||
view.setUint16(20, 1, true);
|
||||
// Channel count
|
||||
view.setUint16(22, channels, true);
|
||||
// Sample rate
|
||||
view.setUint32(24, sampleRate, true);
|
||||
// Byte rate (sample rate * block align)
|
||||
view.setUint32(28, sampleRate * channels * (bitsPerSample / 8), true);
|
||||
// Block align (channel count * bytes per sample)
|
||||
view.setUint16(32, channels * (bitsPerSample / 8), true);
|
||||
// Bits per sample
|
||||
view.setUint16(34, bitsPerSample, true);
|
||||
// Data chunk identifier
|
||||
writeString(view, 36, 'data');
|
||||
// Data chunk length
|
||||
view.setUint32(40, dataLength, true);
|
||||
|
||||
return wavHeader;
|
||||
};
|
||||
|
||||
const writeString = function(view, offset, string) {
|
||||
for (let i = 0; i < string.length; i++) {
|
||||
view.setUint8(offset + i, string.charCodeAt(i));
|
||||
}
|
||||
};
|
||||
|
||||
// Sample parameters
|
||||
const sampleRate = audioContext.sampleRate;
|
||||
const bitsPerSample = 16;
|
||||
const channels = 1;
|
||||
|
||||
// Create the audio data buffer
|
||||
const dataLength = audioBuffer.length * 2; // 16-bit = 2 bytes per sample
|
||||
const audioData = new ArrayBuffer(dataLength);
|
||||
const dataView = new DataView(audioData);
|
||||
|
||||
// Convert float32 to int16
|
||||
let offset = 0;
|
||||
for (let i = 0; i < audioBuffer.length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, audioBuffer[i]));
|
||||
dataView.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
// Create the WAV header
|
||||
const header = createWavHeader(sampleRate, bitsPerSample, channels, dataLength);
|
||||
|
||||
// Combine header and audio data
|
||||
const wavBuffer = new Uint8Array(header.byteLength + audioData.byteLength);
|
||||
wavBuffer.set(new Uint8Array(header), 0);
|
||||
wavBuffer.set(new Uint8Array(audioData), header.byteLength);
|
||||
|
||||
return wavBuffer;
|
||||
}
|
||||
|
||||
// Toggle interpreter on button click
|
||||
realtimeBtn.addEventListener('click', function() {
|
||||
if (isInterpreting) {
|
||||
stopInterpreting();
|
||||
} else {
|
||||
startInterpreting();
|
||||
}
|
||||
});
|
||||
|
||||
// Cleanup function for when the page is unloaded
|
||||
window.addEventListener('beforeunload', function() {
|
||||
stopInterpreting();
|
||||
if (socket) {
|
||||
socket.disconnect();
|
||||
}
|
||||
});
|
||||
|
||||
// Initialize status
|
||||
realtimeStatusIndicator.textContent = 'Interpreter ready';
|
||||
}
|
||||
|
||||
// IndexedDB functions for offline data storage
|
||||
|
Reference in New Issue
Block a user