Compare commits
5 Commits
main
...
realtime-i
Author | SHA1 | Date | |
---|---|---|---|
4bd56cbe5b | |||
378f976b76 | |||
ae8eaaacc0 | |||
|
d46df7939a | ||
|
5a7f5f04ad |
280
app.py
280
app.py
@ -4,7 +4,10 @@ import tempfile
|
||||
import requests
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import queue
|
||||
from flask import Flask, render_template, request, jsonify, Response, send_file, send_from_directory
|
||||
from flask_socketio import SocketIO, emit
|
||||
import whisper
|
||||
import torch
|
||||
import ollama
|
||||
@ -18,6 +21,9 @@ app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp()
|
||||
app.config['TTS_SERVER'] = os.environ.get('TTS_SERVER_URL', 'http://localhost:5050/v1/audio/speech')
|
||||
app.config['TTS_API_KEY'] = os.environ.get('TTS_API_KEY', '56461d8b44607f2cfcb8030dee313a8e')
|
||||
|
||||
# Initialize Flask-SocketIO
|
||||
socketio = SocketIO(app, cors_allowed_origins="*")
|
||||
|
||||
@app.route('/<path:filename>')
|
||||
def root_files(filename):
|
||||
# Check if requested file is one of the common icon filenames
|
||||
@ -178,7 +184,7 @@ def update_tts_config():
|
||||
|
||||
# Load Whisper model
|
||||
logger.info("Loading Whisper model...")
|
||||
whisper_model = whisper.load_model("medium")
|
||||
whisper_model = whisper.load_model("base")
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
whisper_model = whisper_model.to(device)
|
||||
logger.info("Whisper model loaded successfully")
|
||||
@ -393,5 +399,275 @@ def get_audio(filename):
|
||||
logger.error(f"Audio retrieval error: {str(e)}")
|
||||
return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500
|
||||
|
||||
# Real-time interpreter functions
|
||||
def setup_realtime_interpreter():
|
||||
global whisper_model
|
||||
logger.info("Setting up realtime interpreter...")
|
||||
|
||||
# Create processing queues
|
||||
audio_queue = queue.Queue()
|
||||
transcription_queue = queue.Queue()
|
||||
translation_queue = queue.Queue()
|
||||
|
||||
# Audio processing thread function
|
||||
def process_audio_chunks():
|
||||
while True:
|
||||
try:
|
||||
audio_data, source_lang, session_id = audio_queue.get(timeout=1)
|
||||
if audio_data is None: # Poison pill to stop thread
|
||||
break
|
||||
|
||||
# Log received audio chunk size
|
||||
logger.info(f"Processing audio chunk: {len(audio_data)} bytes for session {session_id}")
|
||||
|
||||
# Save audio chunk to a temporary file
|
||||
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], f'chunk_{session_id}_{int(time.time())}.wav')
|
||||
with open(temp_path, 'wb') as f:
|
||||
f.write(audio_data)
|
||||
|
||||
logger.info(f"Saved audio chunk to {temp_path}, starting transcription")
|
||||
|
||||
# Use whisper to transcribe
|
||||
try:
|
||||
# Check if file is valid
|
||||
file_size = os.path.getsize(temp_path)
|
||||
logger.info(f"File size: {file_size} bytes")
|
||||
|
||||
# Enforce .wav extension for compatibility
|
||||
new_temp_path = temp_path
|
||||
if not temp_path.lower().endswith('.wav'):
|
||||
new_temp_path = temp_path + '.wav'
|
||||
os.rename(temp_path, new_temp_path)
|
||||
logger.info(f"Renamed audio file to ensure .wav extension: {new_temp_path}")
|
||||
temp_path = new_temp_path
|
||||
|
||||
result = whisper_model.transcribe(
|
||||
temp_path,
|
||||
language=LANGUAGE_TO_CODE.get(source_lang, None)
|
||||
)
|
||||
|
||||
# Log the result length to see if we got anything
|
||||
text_result = result["text"].strip()
|
||||
logger.info(f"Transcription result: {len(text_result)} characters")
|
||||
|
||||
# If we got a meaningful result, send it for translation
|
||||
if text_result:
|
||||
logger.info(f"Transcription: '{text_result}'")
|
||||
transcription_queue.put((text_result, source_lang, session_id))
|
||||
# Emit transcription result to client
|
||||
socketio.emit('transcription_result', {
|
||||
'text': text_result,
|
||||
'session_id': session_id
|
||||
})
|
||||
else:
|
||||
logger.info("No transcription text detected in the audio")
|
||||
except Exception as e:
|
||||
logger.error(f"Error transcribing audio chunk: {str(e)}")
|
||||
|
||||
# Clean up temp file
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
|
||||
audio_queue.task_done()
|
||||
except queue.Empty:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Error in audio processing thread: {str(e)}")
|
||||
|
||||
# Translation processing thread function
|
||||
def process_transcriptions():
|
||||
while True:
|
||||
try:
|
||||
text, source_lang, session_id = transcription_queue.get(timeout=1)
|
||||
if text is None: # Poison pill to stop thread
|
||||
break
|
||||
|
||||
# Get the target language from the active sessions
|
||||
target_lang = active_sessions.get(session_id, {}).get('target_lang', 'English')
|
||||
|
||||
# Create a prompt for Gemma 3 translation
|
||||
prompt = f"""
|
||||
Translate the following text from {source_lang} to {target_lang}:
|
||||
|
||||
"{text}"
|
||||
|
||||
Provide only the translation without any additional text.
|
||||
"""
|
||||
|
||||
# Use Ollama to interact with Gemma 3
|
||||
try:
|
||||
response = ollama.chat(
|
||||
model="gemma3:27b",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
translated_text = response['message']['content'].strip()
|
||||
translation_queue.put((translated_text, target_lang, session_id))
|
||||
|
||||
# Emit translation result to client
|
||||
socketio.emit('translation_result', {
|
||||
'text': translated_text,
|
||||
'session_id': session_id
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error translating text: {str(e)}")
|
||||
|
||||
transcription_queue.task_done()
|
||||
except queue.Empty:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Error in translation thread: {str(e)}")
|
||||
|
||||
# TTS processing thread function
|
||||
def process_translations():
|
||||
while True:
|
||||
try:
|
||||
text, language, session_id = translation_queue.get(timeout=1)
|
||||
if text is None: # Poison pill to stop thread
|
||||
break
|
||||
|
||||
voice = LANGUAGE_TO_VOICE.get(language, 'echo')
|
||||
|
||||
# Get TTS server URL and API key from config
|
||||
tts_server_url = app.config['TTS_SERVER']
|
||||
tts_api_key = app.config['TTS_API_KEY']
|
||||
|
||||
# Request TTS from the OpenAI Edge TTS server
|
||||
try:
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {tts_api_key}"
|
||||
}
|
||||
|
||||
# Proper OpenAI TTS payload
|
||||
payload = {
|
||||
"input": text,
|
||||
"voice": voice,
|
||||
"response_format": "mp3",
|
||||
"speed": 1.0
|
||||
}
|
||||
|
||||
tts_response = requests.post(
|
||||
tts_server_url,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=15
|
||||
)
|
||||
|
||||
if tts_response.status_code == 200:
|
||||
# Save audio temporarily
|
||||
temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{session_id}_{int(time.time())}.mp3')
|
||||
with open(temp_audio_path, 'wb') as f:
|
||||
f.write(tts_response.content)
|
||||
|
||||
# Emit audio URL to client
|
||||
audio_url = f'/get_audio/{os.path.basename(temp_audio_path)}'
|
||||
socketio.emit('audio_ready', {
|
||||
'audio_url': audio_url,
|
||||
'session_id': session_id
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating TTS: {str(e)}")
|
||||
|
||||
translation_queue.task_done()
|
||||
except queue.Empty:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Error in TTS thread: {str(e)}")
|
||||
|
||||
# Start worker threads
|
||||
threading.Thread(target=process_audio_chunks, daemon=True).start()
|
||||
threading.Thread(target=process_transcriptions, daemon=True).start()
|
||||
threading.Thread(target=process_translations, daemon=True).start()
|
||||
|
||||
logger.info("Realtime interpreter threads started")
|
||||
|
||||
return audio_queue
|
||||
|
||||
# Dictionary to store active interpretation sessions
|
||||
active_sessions = {}
|
||||
|
||||
# Socket.IO event handlers
|
||||
@socketio.on('connect')
|
||||
def handle_connect():
|
||||
logger.info(f"Client connected: {request.sid}")
|
||||
|
||||
@socketio.on('disconnect')
|
||||
def handle_disconnect():
|
||||
logger.info(f"Client disconnected: {request.sid}")
|
||||
# Clean up any session data
|
||||
if request.sid in active_sessions:
|
||||
del active_sessions[request.sid]
|
||||
|
||||
@socketio.on('start_interpreter_session')
|
||||
def handle_start_session(data):
|
||||
source_lang = data.get('source_lang', 'English')
|
||||
target_lang = data.get('target_lang', 'Spanish')
|
||||
|
||||
# Store session info
|
||||
active_sessions[request.sid] = {
|
||||
'source_lang': source_lang,
|
||||
'target_lang': target_lang,
|
||||
'start_time': time.time()
|
||||
}
|
||||
|
||||
logger.info(f"Started interpreter session {request.sid}: {source_lang} -> {target_lang}")
|
||||
emit('session_started', {'session_id': request.sid})
|
||||
|
||||
@socketio.on('end_interpreter_session')
|
||||
def handle_end_session():
|
||||
if request.sid in active_sessions:
|
||||
logger.info(f"Ended interpreter session {request.sid}")
|
||||
del active_sessions[request.sid]
|
||||
emit('session_ended')
|
||||
|
||||
@socketio.on('audio_chunk')
|
||||
def handle_audio_chunk(data):
|
||||
if request.sid not in active_sessions:
|
||||
emit('error', {'message': 'No active session'})
|
||||
return
|
||||
|
||||
# Get audio data from the client
|
||||
audio_data = data.get('audio')
|
||||
if not audio_data:
|
||||
emit('error', {'message': 'No audio data received'})
|
||||
return
|
||||
|
||||
# Convert audio data to proper format if needed
|
||||
if isinstance(audio_data, list):
|
||||
# Convert list to bytes
|
||||
audio_data = bytes(audio_data)
|
||||
logger.info(f"Converted audio data from list to bytes: {len(audio_data)} bytes")
|
||||
elif isinstance(audio_data, str):
|
||||
# This might be base64 encoded data
|
||||
try:
|
||||
import base64
|
||||
audio_data = base64.b64decode(audio_data.split(',')[1] if ',' in audio_data else audio_data)
|
||||
logger.info(f"Decoded base64 audio data: {len(audio_data)} bytes")
|
||||
except Exception as e:
|
||||
logger.error(f"Error decoding audio data: {str(e)}")
|
||||
emit('error', {'message': 'Invalid audio data format'})
|
||||
return
|
||||
|
||||
# Get session details
|
||||
source_lang = active_sessions[request.sid]['source_lang']
|
||||
|
||||
# Add audio chunk to processing queue
|
||||
audio_processing_queue.put((audio_data, source_lang, request.sid))
|
||||
|
||||
emit('chunk_received')
|
||||
|
||||
# Initialize the audio processing queue
|
||||
audio_processing_queue = None
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5005, debug=True)
|
||||
# Setup real-time interpreter
|
||||
audio_processing_queue = setup_realtime_interpreter()
|
||||
|
||||
# Run with SocketIO instead of regular Flask
|
||||
socketio.run(app, host='0.0.0.0', port=5005, debug=True)
|
||||
|
@ -3,3 +3,5 @@ requests
|
||||
openai-whisper
|
||||
torch
|
||||
ollama
|
||||
flask-socketio
|
||||
eventlet
|
482
static/js/app.js
482
static/js/app.js
@ -8,6 +8,9 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
// Initialize app
|
||||
initApp();
|
||||
|
||||
// Initialize the real-time interpreter
|
||||
initRealtimeInterpreter();
|
||||
|
||||
// Check for PWA installation prompts
|
||||
initInstallPrompt();
|
||||
});
|
||||
@ -64,11 +67,6 @@ function initApp() {
|
||||
const progressContainer = document.getElementById('progressContainer');
|
||||
const progressBar = document.getElementById('progressBar');
|
||||
const audioPlayer = document.getElementById('audioPlayer');
|
||||
const ttsServerAlert = document.getElementById('ttsServerAlert');
|
||||
const ttsServerMessage = document.getElementById('ttsServerMessage');
|
||||
const ttsServerUrl = document.getElementById('ttsServerUrl');
|
||||
const ttsApiKey = document.getElementById('ttsApiKey');
|
||||
const updateTtsServer = document.getElementById('updateTtsServer');
|
||||
|
||||
// Set initial values
|
||||
let isRecording = false;
|
||||
@ -76,52 +74,6 @@ function initApp() {
|
||||
let audioChunks = [];
|
||||
let currentSourceText = '';
|
||||
let currentTranslationText = '';
|
||||
let currentTtsServerUrl = '';
|
||||
|
||||
// Check TTS server status on page load
|
||||
checkTtsServer();
|
||||
|
||||
// Check for saved translations in IndexedDB
|
||||
loadSavedTranslations();
|
||||
|
||||
// Update TTS server URL and API key
|
||||
updateTtsServer.addEventListener('click', function() {
|
||||
const newUrl = ttsServerUrl.value.trim();
|
||||
const newApiKey = ttsApiKey.value.trim();
|
||||
|
||||
if (!newUrl && !newApiKey) {
|
||||
alert('Please provide at least one value to update');
|
||||
return;
|
||||
}
|
||||
|
||||
const updateData = {};
|
||||
if (newUrl) updateData.server_url = newUrl;
|
||||
if (newApiKey) updateData.api_key = newApiKey;
|
||||
|
||||
fetch('/update_tts_config', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(updateData)
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
statusIndicator.textContent = 'TTS configuration updated';
|
||||
// Save URL to localStorage but not the API key for security
|
||||
if (newUrl) localStorage.setItem('ttsServerUrl', newUrl);
|
||||
// Check TTS server with new configuration
|
||||
checkTtsServer();
|
||||
} else {
|
||||
alert('Failed to update TTS configuration: ' + data.error);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Failed to update TTS config:', error);
|
||||
alert('Failed to update TTS configuration. See console for details.');
|
||||
});
|
||||
});
|
||||
|
||||
// Make sure target language is different from source
|
||||
if (targetLanguage.options[0].value === sourceLanguage.value) {
|
||||
@ -337,29 +289,13 @@ function initApp() {
|
||||
audioPlayer.play();
|
||||
} else {
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
|
||||
// Show TTS server alert with error message
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = data.error;
|
||||
|
||||
alert('Failed to play audio: ' + data.error);
|
||||
|
||||
// Check TTS server status again
|
||||
checkTtsServer();
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('TTS error:', error);
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
|
||||
// Show TTS server alert
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = 'Failed to connect to TTS server';
|
||||
});
|
||||
}
|
||||
|
||||
@ -377,48 +313,6 @@ function initApp() {
|
||||
playTranslation.disabled = true;
|
||||
});
|
||||
|
||||
// Function to check TTS server status
|
||||
function checkTtsServer() {
|
||||
fetch('/check_tts_server')
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
currentTtsServerUrl = data.url;
|
||||
ttsServerUrl.value = currentTtsServerUrl;
|
||||
|
||||
// Load saved API key if available
|
||||
const savedApiKey = localStorage.getItem('ttsApiKeySet');
|
||||
if (savedApiKey === 'true') {
|
||||
ttsApiKey.placeholder = '••••••• (API key saved)';
|
||||
}
|
||||
|
||||
if (data.status === 'error' || data.status === 'auth_error') {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = data.message;
|
||||
|
||||
if (data.status === 'auth_error') {
|
||||
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
|
||||
}
|
||||
} else {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-warning');
|
||||
ttsServerAlert.classList.add('alert-success');
|
||||
ttsServerMessage.textContent = 'TTS server is online and ready.';
|
||||
setTimeout(() => {
|
||||
ttsServerAlert.classList.add('d-none');
|
||||
}, 3000);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Failed to check TTS server:', error);
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = 'Failed to check TTS server status.';
|
||||
});
|
||||
}
|
||||
|
||||
// Progress indicator functions
|
||||
function showProgress() {
|
||||
progressContainer.classList.remove('d-none');
|
||||
@ -444,6 +338,376 @@ function initApp() {
|
||||
progressBar.style.width = '0%';
|
||||
}, 500);
|
||||
}
|
||||
|
||||
// Check TTS server status on page load if the alert element exists
|
||||
const ttsServerAlert = document.getElementById('ttsServerAlert');
|
||||
if (ttsServerAlert) {
|
||||
checkTtsServer();
|
||||
}
|
||||
|
||||
// Function to check TTS server status
|
||||
function checkTtsServer() {
|
||||
const ttsServerMessage = document.getElementById('ttsServerMessage');
|
||||
const ttsServerUrl = document.getElementById('ttsServerUrl');
|
||||
|
||||
fetch('/check_tts_server')
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
let currentTtsServerUrl = data.url;
|
||||
if (ttsServerUrl) ttsServerUrl.value = currentTtsServerUrl;
|
||||
|
||||
// Load saved API key if available
|
||||
const savedApiKey = localStorage.getItem('ttsApiKeySet');
|
||||
const ttsApiKey = document.getElementById('ttsApiKey');
|
||||
if (ttsApiKey && savedApiKey === 'true') {
|
||||
ttsApiKey.placeholder = '••••••• (API key saved)';
|
||||
}
|
||||
|
||||
if (ttsServerAlert && ttsServerMessage) {
|
||||
if (data.status === 'error' || data.status === 'auth_error') {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = data.message;
|
||||
|
||||
if (data.status === 'auth_error') {
|
||||
ttsServerMessage.textContent = 'Authentication error with TTS server. Please check your API key.';
|
||||
}
|
||||
} else {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-warning');
|
||||
ttsServerAlert.classList.add('alert-success');
|
||||
ttsServerMessage.textContent = 'TTS server is online and ready.';
|
||||
setTimeout(() => {
|
||||
ttsServerAlert.classList.add('d-none');
|
||||
}, 3000);
|
||||
}
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Failed to check TTS server:', error);
|
||||
if (ttsServerAlert && ttsServerMessage) {
|
||||
ttsServerAlert.classList.remove('d-none');
|
||||
ttsServerAlert.classList.remove('alert-success');
|
||||
ttsServerAlert.classList.add('alert-warning');
|
||||
ttsServerMessage.textContent = 'Failed to check TTS server status.';
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Real-time interpreter module
|
||||
function initRealtimeInterpreter() {
|
||||
// DOM elements
|
||||
const realtimeBtn = document.getElementById('realtimeBtn');
|
||||
const realtimeStatusIndicator = document.getElementById('realtimeStatusIndicator');
|
||||
const sourceLanguage = document.getElementById('sourceLanguage');
|
||||
const targetLanguage = document.getElementById('targetLanguage');
|
||||
const sourceText = document.getElementById('sourceText');
|
||||
const translatedText = document.getElementById('translatedText');
|
||||
const audioPlayer = document.getElementById('audioPlayer');
|
||||
|
||||
// SocketIO connection
|
||||
let socket = null;
|
||||
let mediaRecorder = null;
|
||||
let audioContext = null;
|
||||
let isInterpreting = false;
|
||||
let sessionId = null;
|
||||
|
||||
// Audio processing variables
|
||||
const bufferSize = 16384; // Increased from 4096 for better speech capture
|
||||
let audioProcessor = null;
|
||||
let micStream = null;
|
||||
|
||||
// Initialize the audio context
|
||||
function initAudioContext() {
|
||||
try {
|
||||
window.AudioContext = window.AudioContext || window.webkitAudioContext;
|
||||
audioContext = new AudioContext();
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error('Web Audio API is not supported in this browser', e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Connect to Socket.IO server
|
||||
function connectSocket() {
|
||||
if (socket) {
|
||||
return; // Already connected
|
||||
}
|
||||
|
||||
// Connect to the same host where the Flask app is running
|
||||
socket = io.connect(window.location.origin, {
|
||||
forceNew: true
|
||||
});
|
||||
|
||||
// Socket event handlers
|
||||
socket.on('connect', () => {
|
||||
console.log('Socket connected');
|
||||
});
|
||||
|
||||
socket.on('disconnect', () => {
|
||||
console.log('Socket disconnected');
|
||||
stopInterpreting();
|
||||
});
|
||||
|
||||
socket.on('error', (data) => {
|
||||
console.error('Socket error:', data.message);
|
||||
realtimeStatusIndicator.textContent = `Error: ${data.message}`;
|
||||
});
|
||||
|
||||
socket.on('session_started', (data) => {
|
||||
sessionId = data.session_id;
|
||||
console.log('Interpreter session started:', sessionId);
|
||||
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
|
||||
});
|
||||
|
||||
socket.on('session_ended', () => {
|
||||
console.log('Interpreter session ended');
|
||||
sessionId = null;
|
||||
realtimeStatusIndicator.textContent = 'Interpreter stopped';
|
||||
});
|
||||
|
||||
socket.on('chunk_received', () => {
|
||||
// This is a confirmation that the server received our audio chunk
|
||||
// We can use this to update UI if needed
|
||||
});
|
||||
|
||||
socket.on('transcription_result', (data) => {
|
||||
if (data.session_id === sessionId) {
|
||||
// Update source text with transcription
|
||||
if (sourceText.querySelector('p.text-muted')) {
|
||||
sourceText.innerHTML = ''; // Clear placeholder
|
||||
}
|
||||
|
||||
const p = document.createElement('p');
|
||||
p.textContent = data.text;
|
||||
sourceText.appendChild(p);
|
||||
|
||||
// Auto-scroll to bottom
|
||||
sourceText.scrollTop = sourceText.scrollHeight;
|
||||
}
|
||||
});
|
||||
|
||||
socket.on('translation_result', (data) => {
|
||||
if (data.session_id === sessionId) {
|
||||
// Update translated text
|
||||
if (translatedText.querySelector('p.text-muted')) {
|
||||
translatedText.innerHTML = ''; // Clear placeholder
|
||||
}
|
||||
|
||||
const p = document.createElement('p');
|
||||
p.textContent = data.text;
|
||||
translatedText.appendChild(p);
|
||||
|
||||
// Auto-scroll to bottom
|
||||
translatedText.scrollTop = translatedText.scrollHeight;
|
||||
}
|
||||
});
|
||||
|
||||
socket.on('audio_ready', (data) => {
|
||||
if (data.session_id === sessionId) {
|
||||
// Play the translated audio
|
||||
audioPlayer.src = data.audio_url;
|
||||
audioPlayer.play();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Start the real-time interpreter
|
||||
function startInterpreting() {
|
||||
if (!initAudioContext()) {
|
||||
alert('Your browser does not support the Web Audio API required for the real-time interpreter.');
|
||||
return;
|
||||
}
|
||||
|
||||
connectSocket();
|
||||
|
||||
// Request microphone access
|
||||
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
|
||||
.then((stream) => {
|
||||
micStream = stream;
|
||||
|
||||
// Start a new interpreter session
|
||||
socket.emit('start_interpreter_session', {
|
||||
source_lang: sourceLanguage.value,
|
||||
target_lang: targetLanguage.value
|
||||
});
|
||||
|
||||
// Setup audio processing
|
||||
const audioInput = audioContext.createMediaStreamSource(stream);
|
||||
audioProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
|
||||
|
||||
// Connect the audio processing node
|
||||
audioInput.connect(audioProcessor);
|
||||
audioProcessor.connect(audioContext.destination);
|
||||
|
||||
// Process audio data
|
||||
let silenceStart = performance.now();
|
||||
let isSilent = true;
|
||||
const silenceThreshold = 0.005; // Lower threshold to be more sensitive to speech
|
||||
const silenceDelay = 2000; // 2 seconds of silence before stopping
|
||||
|
||||
audioProcessor.onaudioprocess = function(e) {
|
||||
if (!isInterpreting) return;
|
||||
|
||||
// Get audio data
|
||||
const inputData = e.inputBuffer.getChannelData(0);
|
||||
|
||||
// Check for silence
|
||||
let sum = 0;
|
||||
for (let i = 0; i < inputData.length; i++) {
|
||||
sum += Math.abs(inputData[i]);
|
||||
}
|
||||
const average = sum / inputData.length;
|
||||
|
||||
// Detect speech vs silence
|
||||
if (average > silenceThreshold) {
|
||||
if (isSilent) {
|
||||
isSilent = false;
|
||||
realtimeStatusIndicator.textContent = 'Interpreting...';
|
||||
}
|
||||
silenceStart = performance.now(); // Reset silence timer
|
||||
} else if (!isSilent && (performance.now() - silenceStart > silenceDelay)) {
|
||||
isSilent = true;
|
||||
realtimeStatusIndicator.textContent = 'Waiting for speech...';
|
||||
}
|
||||
|
||||
// Convert buffer to WAV format
|
||||
const wavBuffer = convertToWav(inputData);
|
||||
|
||||
// Send to server if not silent or within silence delay
|
||||
if (!isSilent || (performance.now() - silenceStart <= silenceDelay)) {
|
||||
socket.emit('audio_chunk', {
|
||||
audio: wavBuffer
|
||||
});
|
||||
console.log(`Sent audio chunk: ${wavBuffer.length} bytes`);
|
||||
}
|
||||
};
|
||||
|
||||
// Update UI
|
||||
isInterpreting = true;
|
||||
realtimeBtn.textContent = 'Stop Interpreter';
|
||||
realtimeBtn.classList.replace('btn-primary', 'btn-danger');
|
||||
realtimeStatusIndicator.textContent = 'Interpreter active - listening...';
|
||||
|
||||
// Disable language selectors during interpretation
|
||||
sourceLanguage.disabled = true;
|
||||
targetLanguage.disabled = true;
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Error accessing microphone:', error);
|
||||
realtimeStatusIndicator.textContent = 'Error: Microphone access denied';
|
||||
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
|
||||
});
|
||||
}
|
||||
|
||||
// Stop the real-time interpreter
|
||||
function stopInterpreting() {
|
||||
if (!isInterpreting) return;
|
||||
|
||||
// Stop audio processing
|
||||
if (audioProcessor) {
|
||||
audioProcessor.disconnect();
|
||||
audioProcessor = null;
|
||||
}
|
||||
|
||||
// Stop microphone stream
|
||||
if (micStream) {
|
||||
micStream.getTracks().forEach(track => track.stop());
|
||||
micStream = null;
|
||||
}
|
||||
|
||||
// End the interpreter session
|
||||
if (socket && socket.connected) {
|
||||
socket.emit('end_interpreter_session');
|
||||
}
|
||||
|
||||
// Update UI
|
||||
isInterpreting = false;
|
||||
realtimeBtn.textContent = 'Start Interpreter';
|
||||
realtimeBtn.classList.replace('btn-danger', 'btn-primary');
|
||||
realtimeStatusIndicator.textContent = 'Interpreter ready';
|
||||
|
||||
// Re-enable language selectors
|
||||
sourceLanguage.disabled = false;
|
||||
targetLanguage.disabled = false;
|
||||
}
|
||||
|
||||
// Convert audio buffer to WAV format
|
||||
function convertToWav(audioBuffer) {
|
||||
const sampleRate = audioContext.sampleRate;
|
||||
const numberOfChannels = 1;
|
||||
const bitsPerSample = 16;
|
||||
|
||||
// Log audio properties for debugging
|
||||
console.log(`Recording audio: ${sampleRate}Hz, ${numberOfChannels} channel(s), ${audioBuffer.length} samples`);
|
||||
|
||||
// Calculate sizes and create ArrayBuffers
|
||||
const dataSize = audioBuffer.length * 2; // 16-bit samples
|
||||
const totalSize = 44 + dataSize; // 44 bytes for header + data size
|
||||
|
||||
const buffer = new ArrayBuffer(totalSize);
|
||||
const view = new DataView(buffer);
|
||||
|
||||
// Write WAV header
|
||||
writeString(view, 0, 'RIFF');
|
||||
view.setUint32(4, 36 + dataSize, true); // File size - 8
|
||||
writeString(view, 8, 'WAVE');
|
||||
writeString(view, 12, 'fmt ');
|
||||
view.setUint32(16, 16, true); // Format chunk length
|
||||
view.setUint16(20, 1, true); // PCM format
|
||||
view.setUint16(22, numberOfChannels, true);
|
||||
view.setUint32(24, sampleRate, true);
|
||||
view.setUint32(28, sampleRate * numberOfChannels * (bitsPerSample / 8), true); // Byte rate
|
||||
view.setUint16(32, numberOfChannels * (bitsPerSample / 8), true); // Block align
|
||||
view.setUint16(34, bitsPerSample, true);
|
||||
writeString(view, 36, 'data');
|
||||
view.setUint32(40, dataSize, true);
|
||||
|
||||
// Write audio data
|
||||
let offset = 44;
|
||||
for (let i = 0; i < audioBuffer.length; i++) {
|
||||
// Convert float to int16
|
||||
const s = Math.max(-1, Math.min(1, audioBuffer[i]));
|
||||
const val = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
||||
view.setInt16(offset, val, true);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
// Helper function to write strings to DataView
|
||||
function writeString(view, offset, string) {
|
||||
for (let i = 0; i < string.length; i++) {
|
||||
view.setUint8(offset + i, string.charCodeAt(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Log the created buffer size
|
||||
console.log(`Created WAV buffer: ${buffer.byteLength} bytes`);
|
||||
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
// Toggle interpreter on button click
|
||||
realtimeBtn.addEventListener('click', function() {
|
||||
if (isInterpreting) {
|
||||
stopInterpreting();
|
||||
} else {
|
||||
startInterpreting();
|
||||
}
|
||||
});
|
||||
|
||||
// Cleanup function for when the page is unloaded
|
||||
window.addEventListener('beforeunload', function() {
|
||||
stopInterpreting();
|
||||
if (socket) {
|
||||
socket.disconnect();
|
||||
}
|
||||
});
|
||||
|
||||
// Initialize status
|
||||
realtimeStatusIndicator.textContent = 'Interpreter ready';
|
||||
}
|
||||
|
||||
// IndexedDB functions for offline data storage
|
||||
|
@ -11,6 +11,8 @@
|
||||
<link rel="apple-touch-icon" sizes="152x152" href="/static/icons/apple-icon-152x152.png">
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/static/icons/apple-icon-180x180.png">
|
||||
<link rel="apple-touch-icon" sizes="167x167" href="/static/icons/apple-icon-167x167.png">
|
||||
<!-- Add Socket.IO client library -->
|
||||
<script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
|
||||
<style>
|
||||
body {
|
||||
padding-top: 20px;
|
||||
@ -74,6 +76,8 @@
|
||||
background-color: #f8f9fa;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 15px;
|
||||
overflow-y: auto;
|
||||
max-height: 300px;
|
||||
}
|
||||
.btn-action {
|
||||
border-radius: 10px;
|
||||
@ -191,290 +195,48 @@
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- Add real-time interpreter button -->
|
||||
<div class="text-center mt-3">
|
||||
<button id="realtimeBtn" class="btn btn-primary">
|
||||
<i class="fas fa-language"></i> Start Interpreter
|
||||
</button>
|
||||
<p class="status-indicator" id="realtimeStatusIndicator">Interpreter ready</p>
|
||||
</div>
|
||||
|
||||
<div class="mt-3">
|
||||
<div class="progress d-none" id="progressContainer">
|
||||
<div id="progressBar" class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" style="width: 0%"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Add explanation of real-time interpreter mode -->
|
||||
<div class="row mt-4">
|
||||
<div class="col-md-12">
|
||||
<div class="card">
|
||||
<div class="card-header bg-info text-white">
|
||||
<h5 class="mb-0">Real-time Interpretation Mode</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p>This mode enables continuous, real-time interpretation as you speak:</p>
|
||||
<ul>
|
||||
<li><strong>Start Interpreter</strong> - Begin continuous speech interpretation</li>
|
||||
<li><strong>Stop Interpreter</strong> - End the real-time interpretation session</li>
|
||||
</ul>
|
||||
<p class="text-muted">
|
||||
<small>
|
||||
The interpreter will automatically detect speech, transcribe, translate, and speak the translation with minimal delay.
|
||||
Pause detection automatically handles natural breaks in conversation.
|
||||
</small>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<audio id="audioPlayer" style="display: none;"></audio>
|
||||
</div>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// DOM elements
|
||||
const recordBtn = document.getElementById('recordBtn');
|
||||
const translateBtn = document.getElementById('translateBtn');
|
||||
const sourceText = document.getElementById('sourceText');
|
||||
const translatedText = document.getElementById('translatedText');
|
||||
const sourceLanguage = document.getElementById('sourceLanguage');
|
||||
const targetLanguage = document.getElementById('targetLanguage');
|
||||
const playSource = document.getElementById('playSource');
|
||||
const playTranslation = document.getElementById('playTranslation');
|
||||
const clearSource = document.getElementById('clearSource');
|
||||
const clearTranslation = document.getElementById('clearTranslation');
|
||||
const statusIndicator = document.getElementById('statusIndicator');
|
||||
const progressContainer = document.getElementById('progressContainer');
|
||||
const progressBar = document.getElementById('progressBar');
|
||||
const audioPlayer = document.getElementById('audioPlayer');
|
||||
|
||||
// Set initial values
|
||||
let isRecording = false;
|
||||
let mediaRecorder = null;
|
||||
let audioChunks = [];
|
||||
let currentSourceText = '';
|
||||
let currentTranslationText = '';
|
||||
|
||||
// Make sure target language is different from source
|
||||
if (targetLanguage.options[0].value === sourceLanguage.value) {
|
||||
targetLanguage.selectedIndex = 1;
|
||||
}
|
||||
|
||||
// Event listeners for language selection
|
||||
sourceLanguage.addEventListener('change', function() {
|
||||
if (targetLanguage.value === sourceLanguage.value) {
|
||||
for (let i = 0; i < targetLanguage.options.length; i++) {
|
||||
if (targetLanguage.options[i].value !== sourceLanguage.value) {
|
||||
targetLanguage.selectedIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
targetLanguage.addEventListener('change', function() {
|
||||
if (targetLanguage.value === sourceLanguage.value) {
|
||||
for (let i = 0; i < sourceLanguage.options.length; i++) {
|
||||
if (sourceLanguage.options[i].value !== targetLanguage.value) {
|
||||
sourceLanguage.selectedIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Record button click event
|
||||
recordBtn.addEventListener('click', function() {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
});
|
||||
|
||||
// Function to start recording
|
||||
function startRecording() {
|
||||
navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
.then(stream => {
|
||||
mediaRecorder = new MediaRecorder(stream);
|
||||
audioChunks = [];
|
||||
|
||||
mediaRecorder.addEventListener('dataavailable', event => {
|
||||
audioChunks.push(event.data);
|
||||
});
|
||||
|
||||
mediaRecorder.addEventListener('stop', () => {
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
||||
transcribeAudio(audioBlob);
|
||||
});
|
||||
|
||||
mediaRecorder.start();
|
||||
isRecording = true;
|
||||
recordBtn.classList.add('recording');
|
||||
recordBtn.classList.replace('btn-primary', 'btn-danger');
|
||||
recordBtn.innerHTML = '<i class="fas fa-stop"></i>';
|
||||
statusIndicator.textContent = 'Recording... Click to stop';
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error accessing microphone:', error);
|
||||
alert('Error accessing microphone. Please make sure you have given permission for microphone access.');
|
||||
});
|
||||
}
|
||||
|
||||
// Function to stop recording
|
||||
function stopRecording() {
|
||||
mediaRecorder.stop();
|
||||
isRecording = false;
|
||||
recordBtn.classList.remove('recording');
|
||||
recordBtn.classList.replace('btn-danger', 'btn-primary');
|
||||
recordBtn.innerHTML = '<i class="fas fa-microphone"></i>';
|
||||
statusIndicator.textContent = 'Processing audio...';
|
||||
|
||||
// Stop all audio tracks
|
||||
mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
||||
}
|
||||
|
||||
// Function to transcribe audio
|
||||
function transcribeAudio(audioBlob) {
|
||||
const formData = new FormData();
|
||||
formData.append('audio', audioBlob);
|
||||
formData.append('source_lang', sourceLanguage.value);
|
||||
|
||||
showProgress();
|
||||
|
||||
fetch('/transcribe', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
hideProgress();
|
||||
|
||||
if (data.success) {
|
||||
currentSourceText = data.text;
|
||||
sourceText.innerHTML = `<p>${data.text}</p>`;
|
||||
playSource.disabled = false;
|
||||
translateBtn.disabled = false;
|
||||
statusIndicator.textContent = 'Transcription complete';
|
||||
} else {
|
||||
sourceText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
|
||||
statusIndicator.textContent = 'Transcription failed';
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('Transcription error:', error);
|
||||
sourceText.innerHTML = `<p class="text-danger">Failed to transcribe audio. Please try again.</p>`;
|
||||
statusIndicator.textContent = 'Transcription failed';
|
||||
});
|
||||
}
|
||||
|
||||
// Translate button click event
|
||||
translateBtn.addEventListener('click', function() {
|
||||
if (!currentSourceText) {
|
||||
return;
|
||||
}
|
||||
|
||||
statusIndicator.textContent = 'Translating...';
|
||||
showProgress();
|
||||
|
||||
fetch('/translate', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text: currentSourceText,
|
||||
source_lang: sourceLanguage.value,
|
||||
target_lang: targetLanguage.value
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
hideProgress();
|
||||
|
||||
if (data.success) {
|
||||
currentTranslationText = data.translation;
|
||||
translatedText.innerHTML = `<p>${data.translation}</p>`;
|
||||
playTranslation.disabled = false;
|
||||
statusIndicator.textContent = 'Translation complete';
|
||||
} else {
|
||||
translatedText.innerHTML = `<p class="text-danger">Error: ${data.error}</p>`;
|
||||
statusIndicator.textContent = 'Translation failed';
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('Translation error:', error);
|
||||
translatedText.innerHTML = `<p class="text-danger">Failed to translate. Please try again.</p>`;
|
||||
statusIndicator.textContent = 'Translation failed';
|
||||
});
|
||||
});
|
||||
|
||||
// Play source text
|
||||
playSource.addEventListener('click', function() {
|
||||
if (!currentSourceText) return;
|
||||
|
||||
playAudio(currentSourceText, sourceLanguage.value);
|
||||
statusIndicator.textContent = 'Playing source audio...';
|
||||
});
|
||||
|
||||
// Play translation
|
||||
playTranslation.addEventListener('click', function() {
|
||||
if (!currentTranslationText) return;
|
||||
|
||||
playAudio(currentTranslationText, targetLanguage.value);
|
||||
statusIndicator.textContent = 'Playing translation audio...';
|
||||
});
|
||||
|
||||
// Function to play audio via TTS
|
||||
function playAudio(text, language) {
|
||||
showProgress();
|
||||
|
||||
fetch('/speak', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text: text,
|
||||
language: language
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
hideProgress();
|
||||
|
||||
if (data.success) {
|
||||
audioPlayer.src = data.audio_url;
|
||||
audioPlayer.onended = function() {
|
||||
statusIndicator.textContent = 'Ready';
|
||||
};
|
||||
audioPlayer.play();
|
||||
} else {
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
alert('Failed to play audio: ' + data.error);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
hideProgress();
|
||||
console.error('TTS error:', error);
|
||||
statusIndicator.textContent = 'TTS failed';
|
||||
});
|
||||
}
|
||||
|
||||
// Clear buttons
|
||||
clearSource.addEventListener('click', function() {
|
||||
sourceText.innerHTML = '<p class="text-muted">Your transcribed text will appear here...</p>';
|
||||
currentSourceText = '';
|
||||
playSource.disabled = true;
|
||||
translateBtn.disabled = true;
|
||||
});
|
||||
|
||||
clearTranslation.addEventListener('click', function() {
|
||||
translatedText.innerHTML = '<p class="text-muted">Translation will appear here...</p>';
|
||||
currentTranslationText = '';
|
||||
playTranslation.disabled = true;
|
||||
});
|
||||
|
||||
// Progress indicator functions
|
||||
function showProgress() {
|
||||
progressContainer.classList.remove('d-none');
|
||||
let progress = 0;
|
||||
const interval = setInterval(() => {
|
||||
progress += 5;
|
||||
if (progress > 90) {
|
||||
clearInterval(interval);
|
||||
}
|
||||
progressBar.style.width = `${progress}%`;
|
||||
}, 100);
|
||||
progressBar.dataset.interval = interval;
|
||||
}
|
||||
|
||||
function hideProgress() {
|
||||
const interval = progressBar.dataset.interval;
|
||||
if (interval) {
|
||||
clearInterval(Number(interval));
|
||||
}
|
||||
progressBar.style.width = '100%';
|
||||
setTimeout(() => {
|
||||
progressContainer.classList.add('d-none');
|
||||
progressBar.style.width = '0%';
|
||||
}, 500);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<script src="/static/js/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
Loading…
Reference in New Issue
Block a user