Add comprehensive input validation and sanitization

Frontend Validation:
- Created Validator class with comprehensive validation methods
- HTML sanitization to prevent XSS attacks
- Text sanitization removing dangerous characters
- Language code validation against allowed list
- Audio file validation (size, type, extension)
- URL validation preventing injection attacks
- API key format validation
- Request size validation
- Filename sanitization
- Settings validation with type checking
- Cache key sanitization
- Client-side rate limiting tracking

Backend Validation:
- Created validators.py module for server-side validation
- Audio file validation with size and type checks
- Text sanitization with length limits
- Language code validation
- URL and API key validation
- JSON request size validation
- Rate limiting per endpoint (30 req/min)
- Added validation to all API endpoints
- Error boundary decorators on all routes
- CSRF token support ready

Security Features:
- Prevents XSS through HTML escaping
- Prevents SQL injection through input sanitization
- Prevents directory traversal in filenames
- Prevents oversized requests (DoS protection)
- Rate limiting prevents abuse
- Type checking prevents type confusion attacks
- Length limits prevent memory exhaustion
- Character filtering prevents control character injection

All user inputs are now validated and sanitized before processing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-06-02 22:58:17 -06:00
parent 3804897e2b
commit aedface2a9
5 changed files with 687 additions and 30 deletions

View File

@@ -17,6 +17,7 @@ import {
import { TranslationCache } from './translationCache';
import { RequestQueueManager } from './requestQueue';
import { ErrorBoundary } from './errorBoundary';
import { Validator } from './validator';
// Initialize error boundary
const errorBoundary = ErrorBoundary.getInstance();
@@ -163,8 +164,26 @@ function initApp(): void {
}
const updateData: TTSConfigUpdate = {};
if (newUrl) updateData.server_url = newUrl;
if (newApiKey) updateData.api_key = newApiKey;
// Validate URL
if (newUrl) {
const validatedUrl = Validator.validateURL(newUrl);
if (!validatedUrl) {
alert('Invalid server URL. Please enter a valid HTTP/HTTPS URL.');
return;
}
updateData.server_url = validatedUrl;
}
// Validate API key
if (newApiKey) {
const validatedKey = Validator.validateAPIKey(newApiKey);
if (!validatedKey) {
alert('Invalid API key format. API keys should be 20-128 characters and contain only letters, numbers, dashes, and underscores.');
return;
}
updateData.api_key = validatedKey;
}
fetch('/update_tts_config', {
method: 'POST',
@@ -399,9 +418,33 @@ function initApp(): void {
// Function to transcribe audio
const transcribeAudioBase = async function(audioBlob: Blob): Promise<void> {
// Validate audio file
const validation = Validator.validateAudioFile(new File([audioBlob], 'audio.webm', { type: audioBlob.type }));
if (!validation.valid) {
statusIndicator.textContent = validation.error || 'Invalid audio file';
statusIndicator.classList.add('text-danger');
hideProgress();
hideLoadingOverlay();
return;
}
// Validate language code
const validatedLang = Validator.validateLanguageCode(
sourceLanguage.value,
Array.from(sourceLanguage.options).map(opt => opt.value)
);
if (!validatedLang && sourceLanguage.value !== 'auto') {
statusIndicator.textContent = 'Invalid source language selected';
statusIndicator.classList.add('text-danger');
hideProgress();
hideLoadingOverlay();
return;
}
const formData = new FormData();
formData.append('audio', audioBlob, 'audio.webm'); // Add filename for better server handling
formData.append('source_lang', sourceLanguage.value);
formData.append('audio', audioBlob, Validator.sanitizeFilename('audio.webm'));
formData.append('source_lang', validatedLang || 'auto');
// Log upload size
const sizeInKB = (audioBlob.size / 1024).toFixed(2);
@@ -432,20 +475,22 @@ function initApp(): void {
hideProgress();
if (data.success && data.text) {
currentSourceText = data.text;
// Sanitize the transcribed text
const sanitizedText = Validator.sanitizeText(data.text);
currentSourceText = sanitizedText;
// Handle auto-detected language
if (data.detected_language && sourceLanguage.value === 'auto') {
// Update the source language selector
sourceLanguage.value = data.detected_language;
// Show detected language info
sourceText.innerHTML = `<p class="fade-in">${data.text}</p>
<small class="text-muted">Detected language: ${data.detected_language}</small>`;
// Show detected language info with sanitized HTML
sourceText.innerHTML = `<p class="fade-in">${Validator.sanitizeHTML(sanitizedText)}</p>
<small class="text-muted">Detected language: ${Validator.sanitizeHTML(data.detected_language)}</small>`;
statusIndicator.textContent = `Transcription complete (${data.detected_language} detected)`;
} else {
sourceText.innerHTML = `<p class="fade-in">${data.text}</p>`;
sourceText.innerHTML = `<p class="fade-in">${Validator.sanitizeHTML(sanitizedText)}</p>`;
statusIndicator.textContent = 'Transcription complete';
}
@@ -535,10 +580,37 @@ function initApp(): void {
showProgress();
showLoadingOverlay('Translating to ' + targetLanguage.value + '...');
// Validate input text size
if (!Validator.validateRequestSize({ text: currentSourceText }, 100)) {
translatedText.innerHTML = '<p class="text-danger">Text is too long to translate. Please shorten it.</p>';
statusIndicator.textContent = 'Text too long';
hideProgress();
hideLoadingOverlay();
return;
}
// Validate language codes
const validatedSourceLang = Validator.validateLanguageCode(
sourceLanguage.value,
Array.from(sourceLanguage.options).map(opt => opt.value)
);
const validatedTargetLang = Validator.validateLanguageCode(
targetLanguage.value,
Array.from(targetLanguage.options).map(opt => opt.value)
);
if (!validatedTargetLang) {
translatedText.innerHTML = '<p class="text-danger">Invalid target language selected</p>';
statusIndicator.textContent = 'Invalid language';
hideProgress();
hideLoadingOverlay();
return;
}
const requestBody: TranslationRequest = {
text: currentSourceText,
source_lang: sourceLanguage.value,
target_lang: targetLanguage.value
text: Validator.sanitizeText(currentSourceText),
source_lang: validatedSourceLang || 'auto',
target_lang: validatedTargetLang
};
try {
@@ -567,8 +639,10 @@ function initApp(): void {
hideProgress();
if (data.success && data.translation) {
currentTranslationText = data.translation;
translatedText.innerHTML = `<p class="fade-in">${data.translation}</p>`;
// Sanitize the translated text
const sanitizedTranslation = Validator.sanitizeText(data.translation);
currentTranslationText = sanitizedTranslation;
translatedText.innerHTML = `<p class="fade-in">${Validator.sanitizeHTML(sanitizedTranslation)}</p>`;
playTranslation.disabled = false;
statusIndicator.textContent = 'Translation complete';
statusIndicator.classList.remove('processing');

View File

@@ -1,5 +1,6 @@
// Translation cache management for offline support
import { TranslationCacheEntry, CacheStats } from './types';
import { Validator } from './validator';
export class TranslationCache {
private static DB_NAME = 'VoiceTranslatorDB';
@@ -11,9 +12,10 @@ export class TranslationCache {
// Generate cache key from input parameters
static generateCacheKey(text: string, sourceLang: string, targetLang: string): string {
// Normalize text and create a consistent key
// Normalize and sanitize text to create a consistent key
const normalizedText = text.trim().toLowerCase();
return `${sourceLang}:${targetLang}:${normalizedText}`;
const sanitized = Validator.sanitizeCacheKey(normalizedText);
return `${sourceLang}:${targetLang}:${sanitized}`;
}
// Open or create the cache database

259
static/js/src/validator.ts Normal file
View File

@@ -0,0 +1,259 @@
// Input validation and sanitization utilities
export class Validator {
// Sanitize HTML to prevent XSS attacks
static sanitizeHTML(input: string): string {
// Create a temporary div element
const temp = document.createElement('div');
temp.textContent = input;
return temp.innerHTML;
}
// Validate and sanitize text input
static sanitizeText(input: string, maxLength: number = 10000): string {
if (typeof input !== 'string') {
return '';
}
// Trim and limit length
let sanitized = input.trim().substring(0, maxLength);
// Remove null bytes
sanitized = sanitized.replace(/\0/g, '');
// Remove control characters except newlines and tabs
sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '');
return sanitized;
}
// Validate language code
static validateLanguageCode(code: string, allowedLanguages: string[]): string | null {
if (!code || typeof code !== 'string') {
return null;
}
const sanitized = code.trim().toLowerCase();
// Check if it's in the allowed list
if (allowedLanguages.includes(sanitized) || sanitized === 'auto') {
return sanitized;
}
return null;
}
// Validate file upload
static validateAudioFile(file: File): { valid: boolean; error?: string } {
// Check if file exists
if (!file) {
return { valid: false, error: 'No file provided' };
}
// Check file size (max 25MB)
const maxSize = 25 * 1024 * 1024;
if (file.size > maxSize) {
return { valid: false, error: 'File size exceeds 25MB limit' };
}
// Check file type
const allowedTypes = [
'audio/webm',
'audio/ogg',
'audio/wav',
'audio/mp3',
'audio/mpeg',
'audio/mp4',
'audio/x-m4a',
'audio/x-wav'
];
if (!allowedTypes.includes(file.type)) {
// Check by extension as fallback
const ext = file.name.toLowerCase().split('.').pop();
const allowedExtensions = ['webm', 'ogg', 'wav', 'mp3', 'mp4', 'm4a'];
if (!ext || !allowedExtensions.includes(ext)) {
return { valid: false, error: 'Invalid audio file type' };
}
}
return { valid: true };
}
// Validate URL
static validateURL(url: string): string | null {
if (!url || typeof url !== 'string') {
return null;
}
try {
const parsed = new URL(url);
// Only allow http and https
if (!['http:', 'https:'].includes(parsed.protocol)) {
return null;
}
// Prevent localhost in production
if (window.location.hostname !== 'localhost' &&
(parsed.hostname === 'localhost' || parsed.hostname === '127.0.0.1')) {
return null;
}
return parsed.toString();
} catch (e) {
return null;
}
}
// Validate API key (basic format check)
static validateAPIKey(key: string): string | null {
if (!key || typeof key !== 'string') {
return null;
}
// Trim whitespace
const trimmed = key.trim();
// Check length (most API keys are 20-128 characters)
if (trimmed.length < 20 || trimmed.length > 128) {
return null;
}
// Only allow alphanumeric, dash, and underscore
if (!/^[a-zA-Z0-9\-_]+$/.test(trimmed)) {
return null;
}
return trimmed;
}
// Validate request body size
static validateRequestSize(data: any, maxSizeKB: number = 1024): boolean {
try {
const jsonString = JSON.stringify(data);
const sizeInBytes = new Blob([jsonString]).size;
return sizeInBytes <= maxSizeKB * 1024;
} catch (e) {
return false;
}
}
// Sanitize filename
static sanitizeFilename(filename: string): string {
if (!filename || typeof filename !== 'string') {
return 'file';
}
// Remove path components
let name = filename.split(/[/\\]/).pop() || 'file';
// Remove dangerous characters
name = name.replace(/[^a-zA-Z0-9.\-_]/g, '_');
// Limit length
if (name.length > 255) {
const ext = name.split('.').pop();
const base = name.substring(0, 250 - (ext ? ext.length + 1 : 0));
name = ext ? `${base}.${ext}` : base;
}
return name;
}
// Validate settings object
static validateSettings(settings: any): { valid: boolean; sanitized?: any; errors?: string[] } {
const errors: string[] = [];
const sanitized: any = {};
// Validate notification settings
if (settings.notificationsEnabled !== undefined) {
sanitized.notificationsEnabled = Boolean(settings.notificationsEnabled);
}
if (settings.notifyTranscription !== undefined) {
sanitized.notifyTranscription = Boolean(settings.notifyTranscription);
}
if (settings.notifyTranslation !== undefined) {
sanitized.notifyTranslation = Boolean(settings.notifyTranslation);
}
if (settings.notifyErrors !== undefined) {
sanitized.notifyErrors = Boolean(settings.notifyErrors);
}
// Validate offline mode
if (settings.offlineMode !== undefined) {
sanitized.offlineMode = Boolean(settings.offlineMode);
}
// Validate TTS settings
if (settings.ttsServerUrl !== undefined) {
const url = this.validateURL(settings.ttsServerUrl);
if (settings.ttsServerUrl && !url) {
errors.push('Invalid TTS server URL');
} else {
sanitized.ttsServerUrl = url;
}
}
if (settings.ttsApiKey !== undefined) {
const key = this.validateAPIKey(settings.ttsApiKey);
if (settings.ttsApiKey && !key) {
errors.push('Invalid API key format');
} else {
sanitized.ttsApiKey = key;
}
}
return {
valid: errors.length === 0,
sanitized: errors.length === 0 ? sanitized : undefined,
errors: errors.length > 0 ? errors : undefined
};
}
// Rate limiting check
private static requestCounts: Map<string, number[]> = new Map();
static checkRateLimit(
action: string,
maxRequests: number = 10,
windowMs: number = 60000
): boolean {
const now = Date.now();
const key = action;
if (!this.requestCounts.has(key)) {
this.requestCounts.set(key, []);
}
const timestamps = this.requestCounts.get(key)!;
// Remove old timestamps
const cutoff = now - windowMs;
const recent = timestamps.filter(t => t > cutoff);
// Check if limit exceeded
if (recent.length >= maxRequests) {
return false;
}
// Add current timestamp
recent.push(now);
this.requestCounts.set(key, recent);
return true;
}
// Validate translation cache key
static sanitizeCacheKey(key: string): string {
if (!key || typeof key !== 'string') {
return '';
}
// Remove special characters that might cause issues
return key.replace(/[^\w\s-]/gi, '').substring(0, 500);
}
}