#!/bin/bash # Create necessary directories mkdir -p templates static/{css,js} # Move HTML template to templates directory cat > templates/index.html << 'EOL' Voice Language Translator

Voice Language Translator

Powered by Gemma 3, Whisper & Edge TTS

Source

Your transcribed text will appear here...

Translation

Translation will appear here...

Click to start recording

EOL # Create app.py cat > app.py << 'EOL' import os import time import tempfile import requests import json from flask import Flask, render_template, request, jsonify, Response, send_file import whisper import torch import ollama import logging app = Flask(__name__) app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp() app.config['TTS_SERVER'] = os.environ.get('TTS_SERVER_URL', 'http://localhost:5050/v1/audio/speech') app.config['TTS_API_KEY'] = os.environ.get('TTS_API_KEY', 'your_api_key_here') # Add a route to check TTS server status @app.route('/check_tts_server', methods=['GET']) def check_tts_server(): try: # Try a simple HTTP request to the TTS server response = requests.get(app.config['TTS_SERVER'].rsplit('/api/generate', 1)[0] + '/status', timeout=5) if response.status_code == 200: return jsonify({ 'status': 'online', 'url': app.config['TTS_SERVER'] }) else: return jsonify({ 'status': 'error', 'message': f'TTS server returned status code {response.status_code}', 'url': app.config['TTS_SERVER'] }) except requests.exceptions.RequestException as e: return jsonify({ 'status': 'error', 'message': f'Cannot connect to TTS server: {str(e)}', 'url': app.config['TTS_SERVER'] }) # Initialize logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load Whisper model logger.info("Loading Whisper model...") whisper_model = whisper.load_model("base") logger.info("Whisper model loaded successfully") # Supported languages SUPPORTED_LANGUAGES = { "ar": "Arabic", "hy": "Armenian", "az": "Azerbaijani", "en": "English", "fr": "French", "ka": "Georgian", "kk": "Kazakh", "zh": "Mandarin", "fa": "Farsi", "pt": "Portuguese", "ru": "Russian", "es": "Spanish", "tr": "Turkish", "uz": "Uzbek" } # Map language names to language codes LANGUAGE_TO_CODE = {v: k for k, v in SUPPORTED_LANGUAGES.items()} # Map language names to OpenAI TTS voice options LANGUAGE_TO_VOICE = { "Arabic": "alloy", # Using OpenAI general voices "Armenian": "echo", # as OpenAI doesn't have specific voices "Azerbaijani": "nova", # for all these languages "English": "echo", # We'll use the available voices "French": "alloy", # and rely on the translation being "Georgian": "fable", # in the correct language text "Kazakh": "onyx", "Mandarin": "shimmer", "Farsi": "nova", "Portuguese": "alloy", "Russian": "echo", "Spanish": "nova", "Turkish": "fable", "Uzbek": "onyx" } @app.route('/') def index(): return render_template('index.html', languages=sorted(SUPPORTED_LANGUAGES.values())) @app.route('/transcribe', methods=['POST']) def transcribe(): if 'audio' not in request.files: return jsonify({'error': 'No audio file provided'}), 400 audio_file = request.files['audio'] source_lang = request.form.get('source_lang', '') # Save the audio file temporarily temp_path = os.path.join(app.config['UPLOAD_FOLDER'], 'input_audio.wav') audio_file.save(temp_path) try: # Use Whisper for transcription result = whisper_model.transcribe( temp_path, language=LANGUAGE_TO_CODE.get(source_lang, None) ) transcribed_text = result["text"] return jsonify({ 'success': True, 'text': transcribed_text }) except Exception as e: logger.error(f"Transcription error: {str(e)}") return jsonify({'error': f'Transcription failed: {str(e)}'}), 500 finally: # Clean up the temporary file if os.path.exists(temp_path): os.remove(temp_path) @app.route('/translate', methods=['POST']) def translate(): try: data = request.json text = data.get('text', '') source_lang = data.get('source_lang', '') target_lang = data.get('target_lang', '') if not text or not source_lang or not target_lang: return jsonify({'error': 'Missing required parameters'}), 400 # Create a prompt for Gemma 3 translation prompt = f""" Translate the following text from {source_lang} to {target_lang}: "{text}" Provide only the translation without any additional text. """ # Use Ollama to interact with Gemma 3 response = ollama.chat( model="gemma3", messages=[ { "role": "user", "content": prompt } ] ) translated_text = response['message']['content'].strip() return jsonify({ 'success': True, 'translation': translated_text }) except Exception as e: logger.error(f"Translation error: {str(e)}") return jsonify({'error': f'Translation failed: {str(e)}'}), 500 @app.route('/speak', methods=['POST']) def speak(): try: data = request.json text = data.get('text', '') language = data.get('language', '') if not text or not language: return jsonify({'error': 'Missing required parameters'}), 400 voice = LANGUAGE_TO_VOICE.get(language) if not voice: return jsonify({'error': 'Unsupported language for TTS'}), 400 # Get TTS server URL from environment or config tts_server_url = app.config['TTS_SERVER'] try: # Request TTS from the Edge TTS server logger.info(f"Sending TTS request to {tts_server_url}") tts_response = requests.post( tts_server_url, json={ 'text': text, 'voice': voice, 'output_format': 'mp3' }, timeout=10 # Add timeout ) logger.info(f"TTS response status: {tts_response.status_code}") if tts_response.status_code != 200: error_msg = f'TTS request failed with status {tts_response.status_code}' logger.error(error_msg) # Try to get error details from response if possible try: error_details = tts_response.json() logger.error(f"Error details: {error_details}") except: pass return jsonify({'error': error_msg}), 500 # The response contains the audio data directly temp_audio_path = os.path.join(app.config['UPLOAD_FOLDER'], f'output_{int(time.time())}.mp3') with open(temp_audio_path, 'wb') as f: f.write(tts_response.content) return jsonify({ 'success': True, 'audio_url': f'/get_audio/{os.path.basename(temp_audio_path)}' }) except requests.exceptions.RequestException as e: error_msg = f'Failed to connect to TTS server: {str(e)}' logger.error(error_msg) return jsonify({'error': error_msg}), 500 except Exception as e: logger.error(f"TTS error: {str(e)}") return jsonify({'error': f'TTS failed: {str(e)}'}), 500 @app.route('/get_audio/') def get_audio(filename): try: file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) return send_file(file_path, mimetype='audio/mpeg') except Exception as e: logger.error(f"Audio retrieval error: {str(e)}") return jsonify({'error': f'Audio retrieval failed: {str(e)}'}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=8000, debug=True) EOL # Create requirements.txt cat > requirements.txt << 'EOL' flask==2.3.2 requests==2.31.0 openai-whisper==20231117 torch==2.1.0 ollama==0.1.5 EOL # Create README.md cat > README.md << 'EOL' # Voice Language Translator A mobile-friendly web application that translates spoken language between multiple languages using: - Gemma 3 open-source LLM via Ollama for translation - OpenAI Whisper for speech-to-text - OpenAI Edge TTS for text-to-speech ## Supported Languages - Arabic - Armenian - Azerbaijani - English - French - Georgian - Kazakh - Mandarin - Farsi - Portuguese - Russian - Spanish - Turkish - Uzbek ## Setup Instructions 1. Install the required Python packages: ``` pip install -r requirements.txt ``` 2. Make sure you have Ollama installed and the Gemma 3 model loaded: ``` ollama pull gemma3 ``` 3. Ensure your OpenAI Edge TTS server is running on port 5050. 4. Run the application: ``` python app.py ``` 5. Open your browser and navigate to: ``` http://localhost:8000 ``` ## Usage 1. Select your source language from the dropdown menu 2. Press the microphone button and speak 3. Press the button again to stop recording 4. Wait for the transcription to complete 5. Select your target language 6. Press the "Translate" button 7. Use the play buttons to hear the original or translated text ## Technical Details - The app uses Flask for the web server - Audio is processed client-side using the MediaRecorder API - Whisper for speech recognition with language hints - Ollama provides access to the Gemma 3 model for translation - OpenAI Edge TTS delivers natural-sounding speech output ## Mobile Support The interface is fully responsive and designed to work well on mobile devices. EOL # Make the script executable chmod +x app.py echo "Setup complete! Run the app with: python app.py"