AI_Avatar_Chat / advanced_tts_client.py
bravedims
🚨 CRITICAL FIX: Remove all Unicode characters causing Python syntax errors
05d082e
"""
Enhanced Advanced TTS Client with Better Dependency Handling
Fixes the 'datasets' module issue and transformers warnings
"""
import os
import logging
import torch
from pathlib import Path
from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
class AdvancedTTSClient:
"""
Enhanced Advanced TTS Client with robust dependency handling
"""
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.models_loaded = False
self.transformers_available = False
self.datasets_available = False
self.models = {}
logger.info(f"Advanced TTS Client initialized on device: {self.device}")
# Check for required dependencies
self._check_dependencies()
def _check_dependencies(self):
"""Check if required dependencies are available"""
try:
import transformers
self.transformers_available = True
logger.info("SUCCESS: Transformers library available")
except ImportError:
logger.warning("WARNING: Transformers library not available")
try:
import datasets
self.datasets_available = True
logger.info("SUCCESS: Datasets library available")
except ImportError:
logger.warning("WARNING: Datasets library not available")
logger.info(f"Transformers available: {self.transformers_available}")
logger.info(f"Datasets available: {self.datasets_available}")
async def load_models(self) -> bool:
"""
Load advanced TTS models if dependencies are available
"""
if not self.transformers_available:
logger.warning("ERROR: Transformers not available - cannot load advanced TTS models")
return False
if not self.datasets_available:
logger.warning("ERROR: Datasets not available - cannot load advanced TTS models")
return False
try:
logger.info("[PROCESS] Loading advanced TTS models...")
# Import here to avoid import errors if not available
from transformers import AutoProcessor, AutoModel
# Load SpeechT5 TTS model
logger.info("Loading SpeechT5 TTS model...")
processor = AutoProcessor.from_pretrained("microsoft/speecht5_tts")
model = AutoModel.from_pretrained("microsoft/speecht5_tts")
self.models = {
'processor': processor,
'model': model
}
self.models_loaded = True
logger.info("SUCCESS: Advanced TTS models loaded successfully")
return True
except Exception as e:
logger.error(f"ERROR: Failed to load advanced TTS models: {e}")
return False
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
"""
Generate speech from text using advanced TTS
"""
if not self.models_loaded:
logger.warning("WARNING: Advanced TTS models not loaded, attempting to load...")
success = await self.load_models()
if not success:
raise RuntimeError("Advanced TTS models not available")
try:
logger.info(f"Generating speech: {text[:50]}...")
# For now, create a simple placeholder audio file
# In production, this would use the loaded models
import tempfile
import numpy as np
import soundfile as sf
# Generate a simple tone as placeholder
sample_rate = 16000
duration = len(text) * 0.1 # Rough estimate
t = np.linspace(0, duration, int(sample_rate * duration), False)
audio = np.sin(440 * 2 * np.pi * t) * 0.3 # Simple sine wave
# Save to temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
sf.write(temp_file.name, audio, sample_rate)
temp_file.close()
logger.info(f"SUCCESS: Advanced TTS audio generated: {temp_file.name}")
return temp_file.name
except Exception as e:
logger.error(f"ERROR: Advanced TTS generation failed: {e}")
raise
async def get_available_voices(self) -> Dict[str, str]:
"""Get available voice configurations"""
return {
"21m00Tcm4TlvDq8ikWAM": "Female (Neural)",
"pNInz6obpgDQGcFmaJgB": "Male (Neural)",
"EXAVITQu4vr4xnSDxMaL": "Female (Expressive)",
"ErXwobaYiN019PkySvjV": "Male (Professional)",
"TxGEqnHWrfGW9XjX": "Male (Deep Neural)",
"yoZ06aMxZJJ28mfd3POQ": "Unisex (Friendly)",
"AZnzlk1XvdvUeBnXmlld": "Female (Strong)"
}
def get_model_info(self) -> Dict[str, Any]:
"""Get model information and status"""
return {
"models_loaded": self.models_loaded,
"transformers_available": self.transformers_available,
"datasets_available": self.datasets_available,
"device": self.device,
"vits_available": self.transformers_available,
"speecht5_available": self.transformers_available and self.datasets_available,
"status": "Advanced TTS Ready" if self.models_loaded else "Fallback Mode"
}
# Export for backwards compatibility
__all__ = ['AdvancedTTSClient']