Improve audio handling compatibility
- Add support for different audio data formats - Ensure proper .wav extension for Whisper compatibility - Add file size logging for debugging - Fix data format conversion between client and server
This commit is contained in:
parent
378f976b76
commit
4bd56cbe5b
28
app.py
28
app.py
@ -429,6 +429,18 @@ def setup_realtime_interpreter():
|
|||||||
|
|
||||||
# Use whisper to transcribe
|
# Use whisper to transcribe
|
||||||
try:
|
try:
|
||||||
|
# Check if file is valid
|
||||||
|
file_size = os.path.getsize(temp_path)
|
||||||
|
logger.info(f"File size: {file_size} bytes")
|
||||||
|
|
||||||
|
# Enforce .wav extension for compatibility
|
||||||
|
new_temp_path = temp_path
|
||||||
|
if not temp_path.lower().endswith('.wav'):
|
||||||
|
new_temp_path = temp_path + '.wav'
|
||||||
|
os.rename(temp_path, new_temp_path)
|
||||||
|
logger.info(f"Renamed audio file to ensure .wav extension: {new_temp_path}")
|
||||||
|
temp_path = new_temp_path
|
||||||
|
|
||||||
result = whisper_model.transcribe(
|
result = whisper_model.transcribe(
|
||||||
temp_path,
|
temp_path,
|
||||||
language=LANGUAGE_TO_CODE.get(source_lang, None)
|
language=LANGUAGE_TO_CODE.get(source_lang, None)
|
||||||
@ -626,6 +638,22 @@ def handle_audio_chunk(data):
|
|||||||
emit('error', {'message': 'No audio data received'})
|
emit('error', {'message': 'No audio data received'})
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Convert audio data to proper format if needed
|
||||||
|
if isinstance(audio_data, list):
|
||||||
|
# Convert list to bytes
|
||||||
|
audio_data = bytes(audio_data)
|
||||||
|
logger.info(f"Converted audio data from list to bytes: {len(audio_data)} bytes")
|
||||||
|
elif isinstance(audio_data, str):
|
||||||
|
# This might be base64 encoded data
|
||||||
|
try:
|
||||||
|
import base64
|
||||||
|
audio_data = base64.b64decode(audio_data.split(',')[1] if ',' in audio_data else audio_data)
|
||||||
|
logger.info(f"Decoded base64 audio data: {len(audio_data)} bytes")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error decoding audio data: {str(e)}")
|
||||||
|
emit('error', {'message': 'Invalid audio data format'})
|
||||||
|
return
|
||||||
|
|
||||||
# Get session details
|
# Get session details
|
||||||
source_lang = active_sessions[request.sid]['source_lang']
|
source_lang = active_sessions[request.sid]['source_lang']
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user