Compare commits

..

5 Commits

Author SHA1 Message Date
4bd56cbe5b Improve audio handling compatibility
- Add support for different audio data formats
- Ensure proper .wav extension for Whisper compatibility 
- Add file size logging for debugging
- Fix data format conversion between client and server
2025-04-12 01:51:15 -06:00
378f976b76 Fix real-time interpreter audio processing
- Add better logging for audio chunk processing
- Increase audio buffer size for better speech detection
- Improve WAV format creation for better Whisper compatibility
- Lower silence threshold for better speech detection
- Extend silence timeout from 1 to 2 seconds
2025-04-12 01:50:39 -06:00
ae8eaaacc0 whisper model to base 2025-04-12 01:10:28 -06:00
adelorenzo
d46df7939a Add real-time language interpreter feature
This commit adds a real-time interpreter mode to the Voice Language Translator:
- Implements continuous speech-to-translation capabilities using WebSockets
- Adds background processing threads for audio, transcription, and TTS
- Adds client-side speech detection and streaming
- Updates UI to include real-time interpreter controls
- Adds necessary dependencies (flask-socketio, eventlet)
2025-04-12 00:39:16 -06:00
adelorenzo
5a7f5f04ad Add real-time language interpreter feature
This commit adds a real-time language interpreter feature to the Voice Language Translator app, including:
- Socket.IO integration for real-time communication
- Background processing threads for audio, transcription, and TTS
- Continuous speech detection and interpretation
- Client-side Web Audio API integration
- UI components for real-time interpretation mode
2025-04-12 00:30:36 -06:00
4 changed files with 689 additions and 385 deletions

280
app.py
View File

@ -4,7 +4,10 @@ import tempfile
import requests import requests
import json import json
import logging import logging
import threading
import queue
from flask import Flask, render_template, request, jsonify, Response, send_file, send_from_directory from flask import Flask, render_template, request, jsonify, Response, send_file, send_from_directory
from flask_socketio import SocketIO, emit
import whisper import whisper
import torch import torch
import ollama import ollama
@ -18,6 +21,9 @@ app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp()
app.config['TTS_SERVER'] = os.environ.get('TTS_SERVER_URL', 'http://localhost:5050/v1/audio/speech') app.config['TTS_SERVER'] = os.environ.get('TTS_SERVER_URL', 'http://localhost:5050/v1/audio/speech')
app.config['TTS_API_KEY'] = os.environ.get('TTS_API_KEY', '56461d8b44607f2cfcb8030dee313a8e') app.config['TTS_API_KEY'] = os.environ.get('TTS_API_KEY', '56461d8b44607f2cfcb8030dee313a8e')
# Initialize Flask-SocketIO
socketio = SocketIO(app, cors_allowed_origins="*")
@app.route('/<path:filename>') @app.route('/<path:filename>')
def root_files(filename): def root_files(filename):
# Check if requested file is one of the common icon filenames # Check if requested file is one of the common icon filenames
@ -178,7 +184,7 @@ def update_tts_config():
# Load Whisper model # Load Whisper model
logger.info("Loading Whisper model...") logger.info("Loading Whisper model...")
whisper_model = whisper.load_model("medium") whisper_model = whisper.load_model("base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
whisper_model = whisper_model.to(device) whisper_model = whisper_model.to(device)
logger.info("Whisper model loaded successfully") logger.info("Whisper model loaded successfully")
@ -393,5 +399,275 @@ def get_audio(filename):
logger.error(f"Audio retrieval error: {str(e)}") logger.error(f"Audio retrieval error: {str(e)}")
return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500 return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500
# Real-time interpreter functions
def setup_realtime_interpreter():
global whisper_model
logger.info("Setting up realtime interpreter...")
# Create processing queues
audio_queue = queue.Queue()
transcription_queue = queue.Queue()
translation_queue = queue.Queue()
# Audio processing thread function
def process_audio_chunks():
while True:
try:
audio_data, source_lang, session_id = audio_queue.get(timeout=1)
if audio_data is None: # Poison pill to stop thread
break
# Log received audio chunk size
logger.info(f"Processing audio chunk: {len(audio_data)} bytes for session {session_id}")
# Save audio chunk to a temporary file
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], f'chunk_{session_id}_{int(time.time())}.wav')
with open(temp_path, 'wb') as f:
f.write(audio_data)
logger.info(f"Saved audio chunk to {temp_path}, starting transcription")
# Use whisper to transcribe
try:
# Check if file is valid
file_size = os.path.getsize(temp_path)
logger.info(f"File size: {file_size} bytes")
# Enforce .wav extension for compatibility
new_temp_path = temp_path
if not temp_path.lower().endswith('.wav'):
new_temp_path = temp_path + '.wav'
os.rename(temp_path, new_temp_path)
logger.info(f"Renamed audio file to ensure .wav extension: {new_temp_path}")
temp_path = new_temp_path
result = whisper_model.transcribe(
temp_path,
language=LANGUAGE_TO_CODE.get(source_lang, None)
)
# Log the result length to see if we got anything
text_result = result["text"].strip()
logger.info(f"Transcription result: {len(text_result)} characters")
# If we got a meaningful result, send it for translation
if text_result:
logger.info(f"Transcription: '{text_result}'")
transcription_queue.put((text_result, source_lang, session_id))
# Emit transcription result to client
socketio.emit('transcription_result', {
'text': text_result,
'session_id': session_id
})
else:
logger.info("No transcription text detected in the audio")
except Exception as e:
logger.error(f"Error transcribing audio chunk: {str(e)}")
# Clean up temp file
if os.path.exists(temp_path):
os.remove(temp_path)
audio_queue.task_done()
except queue.Empty:
continue
except Exception as e:
logger.error(f"Error in audio processing thread: {str(e)}")
# Translation processing thread function
def process_transcriptions():
while True:
try:
text, source_lang, session_id = transcription_queue.get(timeout=1)
if text is None: # Poison pill to stop thread
break
# Get the target language from the active sessions
target_lang = active_sessions.get(session_id, {}).get('target_lang', 'English')
# Create a prompt for Gemma 3 translation
prompt = f"""
Translate the following text from {source_lang} to {target_lang}:
"{text}"
Provide only the translation without any additional text.
"""
# Use Ollama to interact with Gemma 3
try:
response = ollama.chat(
model="gemma3:27b",
messages=[
{
"role": "user",
"content": prompt
}
]
)
translated_text = response['message']['content'].strip()
translation_queue.put((translated_text, target_lang, session_id))
# Emit translation result to client
socketio.emit('translation_result', {
'text': translated_text,
'session_id': session_id
})
except Exception as e:
logger.error(f"Error translating text: {str(e)}")
transcription_queue.task_done()
except queue.Empty:
continue
except Exception as e:
logger.error(f"Error in translation thread: {str(e)}")
# TTS processing thread function
def process_translations():
while True:
try:
text, language, session_id = translation_queue.get(timeout=1)
if text is None: # Poison pill to stop thread
break
voice = LANGUAGE_TO_VOICE.get(language, 'echo')
# Get TTS server URL and API key from config
tts_server_url = app.config['TTS_SERVER']
tts_api_key = app.config['TTS_API_KEY']
# Request TTS from the OpenAI Edge TTS server
try:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {tts_api_key}"
}
# Proper OpenAI TTS payload
payload = {
"input": text,
"voice": voice,
"response_format": "mp3",
"speed": 1.0
}
tts_response = requests.post(
tts_server_url,
headers=headers,
json=payload,
timeout=15
)
if tts_response.status_code == 200:
# Save audio temporarily
temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{session_id}_{int(time.time())}.mp3')
with open(temp_audio_path, 'wb') as f:
f.write(tts_response.content)
# Emit audio URL to client
audio_url = f'/get_audio/{os.path.basename(temp_audio_path)}'
socketio.emit('audio_ready', {
'audio_url': audio_url,
'session_id': session_id
})
except Exception as e:
logger.error(f"Error generating TTS: {str(e)}")
translation_queue.task_done()
except queue.Empty:
continue
except Exception as e:
logger.error(f"Error in TTS thread: {str(e)}")
# Start worker threads
threading.Thread(target=process_audio_chunks, daemon=True).start()
threading.Thread(target=process_transcriptions, daemon=True).start()
threading.Thread(target=process_translations, daemon=True).start()
logger.info("Realtime interpreter threads started")
return audio_queue
# Dictionary to store active interpretation sessions
active_sessions = {}
# Socket.IO event handlers
@socketio.on('connect')
def handle_connect():
logger.info(f"Client connected: {request.sid}")
@socketio.on('disconnect')
def handle_disconnect():
logger.info(f"Client disconnected: {request.sid}")
# Clean up any session data
if request.sid in active_sessions:
del active_sessions[request.sid]
@socketio.on('start_interpreter_session')
def handle_start_session(data):
source_lang = data.get('source_lang', 'English')
target_lang = data.get('target_lang', 'Spanish')
# Store session info
active_sessions[request.sid] = {
'source_lang': source_lang,
'target_lang': target_lang,
'start_time': time.time()
}
logger.info(f"Started interpreter session {request.sid}: {source_lang} -> {target_lang}")
emit('session_started', {'session_id': request.sid})
@socketio.on('end_interpreter_session')
def handle_end_session():
if request.sid in active_sessions:
logger.info(f"Ended interpreter session {request.sid}")
del active_sessions[request.sid]
emit('session_ended')
@socketio.on('audio_chunk')
def handle_audio_chunk(data):
if request.sid not in active_sessions:
emit('error', {'message': 'No active session'})
return
# Get audio data from the client
audio_data = data.get('audio')
if not audio_data:
emit('error', {'message': 'No audio data received'})
return
# Convert audio data to proper format if needed
if isinstance(audio_data, list):
# Convert list to bytes
audio_data = bytes(audio_data)
logger.info(f"Converted audio data from list to bytes: {len(audio_data)} bytes")
elif isinstance(audio_data, str):
# This might be base64 encoded data
try:
import base64
audio_data = base64.b64decode(audio_data.split(',')[1] if ',' in audio_data else audio_data)
logger.info(f"Decoded base64 audio data: {len(audio_data)} bytes")
except Exception as e:
logger.error(f"Error decoding audio data: {str(e)}")
emit('error', {'message': 'Invalid audio data format'})
return
# Get session details
source_lang = active_sessions[request.sid]['source_lang']
# Add audio chunk to processing queue
audio_processing_queue.put((audio_data, source_lang, request.sid))
emit('chunk_received')
# Initialize the audio processing queue
audio_processing_queue = None
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='0.0.0.0', port=5005, debug=True) # Setup real-time interpreter
audio_processing_queue = setup_realtime_interpreter()
# Run with SocketIO instead of regular Flask
socketio.run(app, host='0.0.0.0', port=5005, debug=True)

View File

@ -3,3 +3,5 @@ requests
openai-whisper openai-whisper
torch torch
ollama ollama
flask-socketio
eventlet

View File

@ -8,6 +8,9 @@ document.addEventListener('DOMContentLoaded', function() {
// Initialize app // Initialize app
initApp(); initApp();
// Initialize the real-time interpreter
initRealtimeInterpreter();
// Check for PWA installation prompts // Check for PWA installation prompts
initInstallPrompt(); initInstallPrompt();
}); });
@ -64,11 +67,6 @@ function initApp() {
const progressContainer = document.getElementById('progressContainer'); const progressContainer = document.getElementById('progressContainer');
const progressBar = document.getElementById('progressBar'); const progressBar = document.getElementById('progressBar');
const audioPlayer = document.getElementById('audioPlayer'); const audioPlayer = document.getElementById('audioPlayer');
const ttsServerAlert = document.getElementById('ttsServerAlert');
const ttsServerMessage = document.getElementById('ttsServerMessage');
const ttsServerUrl = document.getElementById('ttsServerUrl');
const ttsApiKey = document.getElementById('ttsApiKey');
const updateTtsServer = document.getElementById('updateTtsServer');
// Set initial values // Set initial values
let isRecording = false; let isRecording = false;
@ -76,52 +74,6 @@ function initApp() {
let audioChunks = []; let audioChunks = [];
let currentSourceText = ''; let currentSourceText = '';
let currentTranslationText = ''; let currentTranslationText = '';
let currentTtsServerUrl = '';
// Check TTS server status on page load
checkTtsServer();
// Check for saved translations in IndexedDB
loadSavedTranslations();
// Update TTS server URL and API key
updateTtsServer.addEventListener('click', function() {
const newUrl = ttsServerUrl.value.trim();
const newApiKey = ttsApiKey.value.trim();
if (!newUrl && !newApiKey) {
alert('Please provide at least one value to update');
return;
}
const updateData = {};
if (newUrl) updateData.server_url = newUrl;
if (newApiKey) updateData.api_key = newApiKey;
fetch('/update_tts_config', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(updateData)
})
.then(response => response.json())
.then(data => {
if (data.success) {
statusIndicator.textContent = 'TTS configuration updated';
// Save URL to localStorage but not the API key for security
if (newUrl) localStorage.setItem('ttsServerUrl', newUrl);
// Check TTS server with new configuration
checkTtsServer();
} else {
alert('Failed to update TTS configuration: ' + data.error);
}
})
.catch(error => {
console.error('Failed to update TTS config:', error);
alert('Failed to update TTS configuration. See console for details.');
});
});
// Make sure target language is different from source // Make sure target language is different from source
if (targetLanguage.options[0].value === sourceLanguage.value) { if (targetLanguage.options[0].value === sourceLanguage.value) {
@ -337,29 +289,13 @@ function initApp() {
audioPlayer.play(); audioPlayer.play();
} else { } else {
statusIndicator.textContent = 'TTS failed'; statusIndicator.textContent = 'TTS failed';
// Show TTS server alert with error message
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = data.error;
alert('Failed to play audio: ' + data.error); alert('Failed to play audio: ' + data.error);
// Check TTS server status again
checkTtsServer();
} }
}) })
.catch(error => { .catch(error => {
hideProgress(); hideProgress();
console.error('TTS error:', error); console.error('TTS error:', error);
statusIndicator.textContent = 'TTS failed'; statusIndicator.textContent = 'TTS failed';
// Show TTS server alert
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = 'Failed to connect to TTS server';
}); });
} }
@ -377,48 +313,6 @@ function initApp() {
playTranslation.disabled = true; playTranslation.disabled = true;
}); });
// Function to check TTS server status
function checkTtsServer() {
fetch('/check_tts_server')
.then(response => response.json())
.then(data => {
currentTtsServerUrl = data.url;
ttsServerUrl.value = currentTtsServerUrl;
// Load saved API key if available
const savedApiKey = localStorage.getItem('ttsApiKeySet');
if (savedApiKey === 'true') {
ttsApiKey.placeholder = '••••••• (API key saved)';
}
if (data.status === 'error' || data.status === 'auth_error') {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = data.message;
if (data.status === 'auth_error') {
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
}
} else {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-warning');
ttsServerAlert.classList.add('alert-success');
ttsServerMessage.textContent = 'TTS server is online and ready.';
setTimeout(() => {
ttsServerAlert.classList.add('d-none');
}, 3000);
}
})
.catch(error => {
console.error('Failed to check TTS server:', error);
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = 'Failed to check TTS server status.';
});
}
// Progress indicator functions // Progress indicator functions
function showProgress() { function showProgress() {
progressContainer.classList.remove('d-none'); progressContainer.classList.remove('d-none');
@ -444,6 +338,376 @@ function initApp() {
progressBar.style.width = '0%'; progressBar.style.width = '0%';
}, 500); }, 500);
} }
// Check TTS server status on page load if the alert element exists
const ttsServerAlert = document.getElementById('ttsServerAlert');
if (ttsServerAlert) {
checkTtsServer();
}
// Function to check TTS server status
function checkTtsServer() {
const ttsServerMessage = document.getElementById('ttsServerMessage');
const ttsServerUrl = document.getElementById('ttsServerUrl');
fetch('/check_tts_server')
.then(response => response.json())
.then(data => {
let currentTtsServerUrl = data.url;
if (ttsServerUrl) ttsServerUrl.value = currentTtsServerUrl;
// Load saved API key if available
const savedApiKey = localStorage.getItem('ttsApiKeySet');
const ttsApiKey = document.getElementById('ttsApiKey');
if (ttsApiKey && savedApiKey === 'true') {
ttsApiKey.placeholder = '••••••• (API key saved)';
}
if (ttsServerAlert && ttsServerMessage) {
if (data.status === 'error' || data.status === 'auth_error') {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = data.message;
if (data.status === 'auth_error') {
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
}
} else {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-warning');
ttsServerAlert.classList.add('alert-success');
ttsServerMessage.textContent = 'TTS server is online and ready.';
setTimeout(() => {
ttsServerAlert.classList.add('d-none');
}, 3000);
}
}
})
.catch(error => {
console.error('Failed to check TTS server:', error);
if (ttsServerAlert && ttsServerMessage) {
ttsServerAlert.classList.remove('d-none');
ttsServerAlert.classList.remove('alert-success');
ttsServerAlert.classList.add('alert-warning');
ttsServerMessage.textContent = 'Failed to check TTS server status.';
}
});
}
}
// Real-time interpreter module
function initRealtimeInterpreter() {
// DOM elements
const realtimeBtn = document.getElementById('realtimeBtn');
const realtimeStatusIndicator = document.getElementById('realtimeStatusIndicator');
const sourceLanguage = document.getElementById('sourceLanguage');
const targetLanguage = document.getElementById('targetLanguage');
const sourceText = document.getElementById('sourceText');
const translatedText = document.getElementById('translatedText');
const audioPlayer = document.getElementById('audioPlayer');
// SocketIO connection
let socket = null;
let mediaRecorder = null;
let audioContext = null;
let isInterpreting = false;
let sessionId = null;
// Audio processing variables
const bufferSize = 16384; // Increased from 4096 for better speech capture
let audioProcessor = null;
let micStream = null;
// Initialize the audio context
function initAudioContext() {
try {
window.AudioContext = window.AudioContext || window.webkitAudioContext;
audioContext = new AudioContext();
return true;
} catch (e) {
console.error('Web Audio API is not supported in this browser', e);
return false;
}
}
// Connect to Socket.IO server
function connectSocket() {
if (socket) {
return; // Already connected
}
// Connect to the same host where the Flask app is running
socket = io.connect(window.location.origin, {
forceNew: true
});
// Socket event handlers
socket.on('connect', () => {
console.log('Socket connected');
});
socket.on('disconnect', () => {
console.log('Socket disconnected');
stopInterpreting();
});
socket.on('error', (data) => {
console.error('Socket error:', data.message);
realtimeStatusIndicator.textContent = `Error: ${data.message}`;
});
socket.on('session_started', (data) => {
sessionId = data.session_id;
console.log('Interpreter session started:', sessionId);
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
});
socket.on('session_ended', () => {
console.log('Interpreter session ended');
sessionId = null;
realtimeStatusIndicator.textContent = 'Interpreter stopped';
});
socket.on('chunk_received', () => {
// This is a confirmation that the server received our audio chunk
// We can use this to update UI if needed
});
socket.on('transcription_result', (data) => {
if (data.session_id === sessionId) {
// Update source text with transcription
if (sourceText.querySelector('p.text-muted')) {
sourceText.innerHTML = ''; // Clear placeholder
}
const p = document.createElement('p');
p.textContent = data.text;
sourceText.appendChild(p);
// Auto-scroll to bottom
sourceText.scrollTop = sourceText.scrollHeight;
}
});
socket.on('translation_result', (data) => {
if (data.session_id === sessionId) {
// Update translated text
if (translatedText.querySelector('p.text-muted')) {
translatedText.innerHTML = ''; // Clear placeholder
}
const p = document.createElement('p');
p.textContent = data.text;
translatedText.appendChild(p);
// Auto-scroll to bottom
translatedText.scrollTop = translatedText.scrollHeight;
}
});
socket.on('audio_ready', (data) => {
if (data.session_id === sessionId) {
// Play the translated audio
audioPlayer.src = data.audio_url;
audioPlayer.play();
}
});
}
// Start the real-time interpreter
function startInterpreting() {
if (!initAudioContext()) {
alert('Your browser does not support the Web Audio API required for the real-time interpreter.');
return;
}
connectSocket();
// Request microphone access
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
.then((stream) => {
micStream = stream;
// Start a new interpreter session
socket.emit('start_interpreter_session', {
source_lang: sourceLanguage.value,
target_lang: targetLanguage.value
});
// Setup audio processing
const audioInput = audioContext.createMediaStreamSource(stream);
audioProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
// Connect the audio processing node
audioInput.connect(audioProcessor);
audioProcessor.connect(audioContext.destination);
// Process audio data
let silenceStart = performance.now();
let isSilent = true;
const silenceThreshold = 0.005; // Lower threshold to be more sensitive to speech
const silenceDelay = 2000; // 2 seconds of silence before stopping
audioProcessor.onaudioprocess = function(e) {
if (!isInterpreting) return;
// Get audio data
const inputData = e.inputBuffer.getChannelData(0);
// Check for silence
let sum = 0;
for (let i = 0; i < inputData.length; i++) {
sum += Math.abs(inputData[i]);
}
const average = sum / inputData.length;
// Detect speech vs silence
if (average > silenceThreshold) {
if (isSilent) {
isSilent = false;
realtimeStatusIndicator.textContent = 'Interpreting...';
}
silenceStart = performance.now(); // Reset silence timer
} else if (!isSilent && (performance.now() - silenceStart > silenceDelay)) {
isSilent = true;
realtimeStatusIndicator.textContent = 'Waiting for speech...';
}
// Convert buffer to WAV format
const wavBuffer = convertToWav(inputData);
// Send to server if not silent or within silence delay
if (!isSilent || (performance.now() - silenceStart <= silenceDelay)) {
socket.emit('audio_chunk', {
audio: wavBuffer
});
console.log(`Sent audio chunk: ${wavBuffer.length} bytes`);
}
};
// Update UI
isInterpreting = true;
realtimeBtn.textContent = 'Stop Interpreter';
realtimeBtn.classList.replace('btn-primary', 'btn-danger');
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
// Disable language selectors during interpretation
sourceLanguage.disabled = true;
targetLanguage.disabled = true;
})
.catch((error) => {
console.error('Error accessing microphone:', error);
realtimeStatusIndicator.textContent = 'Error: Microphone access denied';
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
});
}
// Stop the real-time interpreter
function stopInterpreting() {
if (!isInterpreting) return;
// Stop audio processing
if (audioProcessor) {
audioProcessor.disconnect();
audioProcessor = null;
}
// Stop microphone stream
if (micStream) {
micStream.getTracks().forEach(track => track.stop());
micStream = null;
}
// End the interpreter session
if (socket && socket.connected) {
socket.emit('end_interpreter_session');
}
// Update UI
isInterpreting = false;
realtimeBtn.textContent = 'Start Interpreter';
realtimeBtn.classList.replace('btn-danger', 'btn-primary');
realtimeStatusIndicator.textContent = 'Interpreter ready';
// Re-enable language selectors
sourceLanguage.disabled = false;
targetLanguage.disabled = false;
}
// Convert audio buffer to WAV format
function convertToWav(audioBuffer) {
const sampleRate = audioContext.sampleRate;
const numberOfChannels = 1;
const bitsPerSample = 16;
// Log audio properties for debugging
console.log(`Recording audio: ${sampleRate}Hz, ${numberOfChannels} channel(s), ${audioBuffer.length} samples`);
// Calculate sizes and create ArrayBuffers
const dataSize = audioBuffer.length * 2; // 16-bit samples
const totalSize = 44 + dataSize; // 44 bytes for header + data size
const buffer = new ArrayBuffer(totalSize);
const view = new DataView(buffer);
// Write WAV header
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + dataSize, true); // File size - 8
writeString(view, 8, 'WAVE');
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true); // Format chunk length
view.setUint16(20, 1, true); // PCM format
view.setUint16(22, numberOfChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * numberOfChannels * (bitsPerSample / 8), true); // Byte rate
view.setUint16(32, numberOfChannels * (bitsPerSample / 8), true); // Block align
view.setUint16(34, bitsPerSample, true);
writeString(view, 36, 'data');
view.setUint32(40, dataSize, true);
// Write audio data
let offset = 44;
for (let i = 0; i < audioBuffer.length; i++) {
// Convert float to int16
const s = Math.max(-1, Math.min(1, audioBuffer[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7FFF;
view.setInt16(offset, val, true);
offset += 2;
}
// Helper function to write strings to DataView
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
// Log the created buffer size
console.log(`Created WAV buffer: ${buffer.byteLength} bytes`);
return new Uint8Array(buffer);
}
// Toggle interpreter on button click
realtimeBtn.addEventListener('click', function() {
if (isInterpreting) {
stopInterpreting();
} else {
startInterpreting();
}
});
// Cleanup function for when the page is unloaded
window.addEventListener('beforeunload', function() {
stopInterpreting();
if (socket) {
socket.disconnect();
}
});
// Initialize status
realtimeStatusIndicator.textContent = 'Interpreter ready';
} }
// IndexedDB functions for offline data storage // IndexedDB functions for offline data storage

View File

@ -11,6 +11,8 @@
<link rel="apple-touch-icon" sizes="152x152" href="/static/icons/apple-icon-152x152.png"> <link rel="apple-touch-icon" sizes="152x152" href="/static/icons/apple-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/static/icons/apple-icon-180x180.png"> <link rel="apple-touch-icon" sizes="180x180" href="/static/icons/apple-icon-180x180.png">
<link rel="apple-touch-icon" sizes="167x167" href="/static/icons/apple-icon-167x167.png"> <link rel="apple-touch-icon" sizes="167x167" href="/static/icons/apple-icon-167x167.png">
<!-- Add Socket.IO client library -->
<script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
<style> <style>
body { body {
padding-top: 20px; padding-top: 20px;
@ -74,6 +76,8 @@
background-color: #f8f9fa; background-color: #f8f9fa;
border-radius: 10px; border-radius: 10px;
margin-bottom: 15px; margin-bottom: 15px;
overflow-y: auto;
max-height: 300px;
} }
.btn-action { .btn-action {
border-radius: 10px; border-radius: 10px;
@ -191,290 +195,48 @@
</button> </button>
</div> </div>
<!-- Add real-time interpreter button -->
<div class="text-center mt-3">
<button id="realtimeBtn" class="btn btn-primary">
<i class="fas fa-language"></i> Start Interpreter
</button>
<p class="status-indicator" id="realtimeStatusIndicator">Interpreter ready</p>
</div>
<div class="mt-3"> <div class="mt-3">
<div class="progress d-none" id="progressContainer"> <div class="progress d-none" id="progressContainer">
<div id="progressBar" class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" style="width: 0%"></div> <div id="progressBar" class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" style="width: 0%"></div>
</div> </div>
</div> </div>
<!-- Add explanation of real-time interpreter mode -->
<div class="row mt-4">
<div class="col-md-12">
<div class="card">
<div class="card-header bg-info text-white">
<h5 class="mb-0">Real-time Interpretation Mode</h5>
</div>
<div class="card-body">
<p>This mode enables continuous, real-time interpretation as you speak:</p>
<ul>
<li><strong>Start Interpreter</strong> - Begin continuous speech interpretation</li>
<li><strong>Stop Interpreter</strong> - End the real-time interpretation session</li>
</ul>
<p class="text-muted">
<small>
The interpreter will automatically detect speech, transcribe, translate, and speak the translation with minimal delay.
Pause detection automatically handles natural breaks in conversation.
</small>
</p>
</div>
</div>
</div>
</div>
<audio id="audioPlayer" style="display: none;"></audio> <audio id="audioPlayer" style="display: none;"></audio>
</div> </div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
// DOM elements
const recordBtn = document.getElementById('recordBtn');
const translateBtn = document.getElementById('translateBtn');
const sourceText = document.getElementById('sourceText');
const translatedText = document.getElementById('translatedText');
const sourceLanguage = document.getElementById('sourceLanguage');
const targetLanguage = document.getElementById('targetLanguage');
const playSource = document.getElementById('playSource');
const playTranslation = document.getElementById('playTranslation');
const clearSource = document.getElementById('clearSource');
const clearTranslation = document.getElementById('clearTranslation');
const statusIndicator = document.getElementById('statusIndicator');
const progressContainer = document.getElementById('progressContainer');
const progressBar = document.getElementById('progressBar');
const audioPlayer = document.getElementById('audioPlayer');
// Set initial values
let isRecording = false;
let mediaRecorder = null;
let audioChunks = [];
let currentSourceText = '';
let currentTranslationText = '';
// Make sure target language is different from source
if (targetLanguage.options[0].value === sourceLanguage.value) {
targetLanguage.selectedIndex = 1;
}
// Event listeners for language selection
sourceLanguage.addEventListener('change', function() {
if (targetLanguage.value === sourceLanguage.value) {
for (let i = 0; i < targetLanguage.options.length; i++) {
if (targetLanguage.options[i].value !== sourceLanguage.value) {
targetLanguage.selectedIndex = i;
break;
}
}
}
});
targetLanguage.addEventListener('change', function() {
if (targetLanguage.value === sourceLanguage.value) {
for (let i = 0; i < sourceLanguage.options.length; i++) {
if (sourceLanguage.options[i].value !== targetLanguage.value) {
sourceLanguage.selectedIndex = i;
break;
}
}
}
});
// Record button click event
recordBtn.addEventListener('click', function() {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
});
// Function to start recording
function startRecording() {
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.addEventListener('dataavailable', event => {
audioChunks.push(event.data);
});
mediaRecorder.addEventListener('stop', () => {
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
transcribeAudio(audioBlob);
});
mediaRecorder.start();
isRecording = true;
recordBtn.classList.add('recording');
recordBtn.classList.replace('btn-primary', 'btn-danger');
recordBtn.innerHTML = '<i class="fas fa-stop"></i>';
statusIndicator.textContent = 'Recording... Click to stop';
})
.catch(error => {
console.error('Error accessing microphone:', error);
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
});
}
// Function to stop recording
function stopRecording() {
mediaRecorder.stop();
isRecording = false;
recordBtn.classList.remove('recording');
recordBtn.classList.replace('btn-danger', 'btn-primary');
recordBtn.innerHTML = '<i class="fas fa-microphone"></i>';
statusIndicator.textContent = 'Processing audio...';
// Stop all audio tracks
mediaRecorder.stream.getTracks().forEach(track => track.stop());
}
// Function to transcribe audio
function transcribeAudio(audioBlob) {
const formData = new FormData();
formData.append('audio', audioBlob);
formData.append('source_lang', sourceLanguage.value);
showProgress();
fetch('/transcribe', {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
hideProgress();
if (data.success) {
currentSourceText = data.text;
sourceText.innerHTML = `<p>${data.text}</p>`;
playSource.disabled = false;
translateBtn.disabled = false;
statusIndicator.textContent = 'Transcription complete';
} else {
sourceText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
statusIndicator.textContent = 'Transcription failed';
}
})
.catch(error => {
hideProgress();
console.error('Transcription error:', error);
sourceText.innerHTML = `<p class="text-danger">Failed to transcribe audio. Please try again.</p>`;
statusIndicator.textContent = 'Transcription failed';
});
}
// Translate button click event
translateBtn.addEventListener('click', function() {
if (!currentSourceText) {
return;
}
statusIndicator.textContent = 'Translating...';
showProgress();
fetch('/translate', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: currentSourceText,
source_lang: sourceLanguage.value,
target_lang: targetLanguage.value
})
})
.then(response => response.json())
.then(data => {
hideProgress();
if (data.success) {
currentTranslationText = data.translation;
translatedText.innerHTML = `<p>${data.translation}</p>`;
playTranslation.disabled = false;
statusIndicator.textContent = 'Translation complete';
} else {
translatedText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
statusIndicator.textContent = 'Translation failed';
}
})
.catch(error => {
hideProgress();
console.error('Translation error:', error);
translatedText.innerHTML = `<p class="text-danger">Failed to translate. Please try again.</p>`;
statusIndicator.textContent = 'Translation failed';
});
});
// Play source text
playSource.addEventListener('click', function() {
if (!currentSourceText) return;
playAudio(currentSourceText, sourceLanguage.value);
statusIndicator.textContent = 'Playing source audio...';
});
// Play translation
playTranslation.addEventListener('click', function() {
if (!currentTranslationText) return;
playAudio(currentTranslationText, targetLanguage.value);
statusIndicator.textContent = 'Playing translation audio...';
});
// Function to play audio via TTS
function playAudio(text, language) {
showProgress();
fetch('/speak', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: text,
language: language
})
})
.then(response => response.json())
.then(data => {
hideProgress();
if (data.success) {
audioPlayer.src = data.audio_url;
audioPlayer.onended = function() {
statusIndicator.textContent = 'Ready';
};
audioPlayer.play();
} else {
statusIndicator.textContent = 'TTS failed';
alert('Failed to play audio: ' + data.error);
}
})
.catch(error => {
hideProgress();
console.error('TTS error:', error);
statusIndicator.textContent = 'TTS failed';
});
}
// Clear buttons
clearSource.addEventListener('click', function() {
sourceText.innerHTML = '<p class="text-muted">Your transcribed text will appear here...</p>';
currentSourceText = '';
playSource.disabled = true;
translateBtn.disabled = true;
});
clearTranslation.addEventListener('click', function() {
translatedText.innerHTML = '<p class="text-muted">Translation will appear here...</p>';
currentTranslationText = '';
playTranslation.disabled = true;
});
// Progress indicator functions
function showProgress() {
progressContainer.classList.remove('d-none');
let progress = 0;
const interval = setInterval(() => {
progress += 5;
if (progress > 90) {
clearInterval(interval);
}
progressBar.style.width = `${progress}%`;
}, 100);
progressBar.dataset.interval = interval;
}
function hideProgress() {
const interval = progressBar.dataset.interval;
if (interval) {
clearInterval(Number(interval));
}
progressBar.style.width = '100%';
setTimeout(() => {
progressContainer.classList.add('d-none');
progressBar.style.width = '0%';
}, 500);
}
});
</script>
<script src="/static/js/app.js"></script> <script src="/static/js/app.js"></script>
</body> </body>
</html> </html>