# -*- coding: utf-8 -*-
"""TTS with Adjustable Voice Parameters.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1qFHOV3WJLOgPWOAj4v864Wy3ZKZsut35
"""

# This file contains the full code for the TTS demo with adjustable voice parameters.

# This cell installs all necessary Python libraries for the project.
# It includes PyTorch for deep learning, Transformers for NLP, Gradio for UI, TTS for speech, and NumPy for numerical operations.
# NOTE: 'pip install' commands should be in requirements.txt, not directly in the script for deployment.
# !pip install torch torchvision torchaudio --quiet
# !pip install transformers==4.30.2
# !pip install gradio --quiet
# !pip install TTS --quiet
# !pip install numpy
# !pip install soundfile # <<< REMOVE THIS LINE: it should be in requirements.txt

# This cell imports the required libraries for the project.
# It brings in PyTorch for GPU operations, TTS for text-to-speech functionality, and Gradio for creating the web interface.
import torch
from TTS.api import TTS
import gradio as gr
import soundfile as sf # <<< MOVED: Import soundfile here once at the top
import matplotlib.pyplot as plt # <<< MOVED: Import matplotlib here once at the top
import numpy as np # <<< MOVED: Import numpy here once at the top


# This cell initializes the TTS object and lists all available pre-trained models.
# It helps to see which Text-to-Speech models can be used later.
# Note: This part is for exploration. For deployment, you usually load a specific model directly.
# tts = TTS()
# available_models = tts.list_models()
# print(available_models) # <<< For deployment, this print might not be needed.


# This cell selects a specific Text-to-Speech model and loads it for use.
# It prepares the chosen model (here, 'tacotron2-DDC') for generating speech from text.
model_name = "tts_models/en/ljspeech/tacotron2-DDC"
tts = TTS(model_name)

# This cell defines a function to visualize an audio waveform.
# It plots the amplitude against time and saves the plot as a PNG image.
# NOTE: This function is defined, but the Gradio function below duplicates its plotting logic.
# It's good to keep it if you use it separately, otherwise, consider consolidating.
def plot_waveform(wav):
    plt.figure(figsize=(10, 2))
    plt.plot(np.linspace(0, len(wav)/24000, num=len(wav)), wav)
    plt.title("Waveform")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.tight_layout()
    plot_path = "/content/waveform.png"
    plt.savefig(plot_path)
    plt.close() # <<< Added: Close the plot to free memory
    return plot_path

import tempfile

# This cell defines a Gradio-compatible function to synthesize speech from text.
# It generates an audio file and a corresponding waveform plot, handling potential errors.
def synthesize_speech_gr(text, speed=1.0, pitch=1.0, volume=1.0):
    try:
        
        wav = tts.tts(text)
        
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio_file:
            output_path = tmp_audio_file.name
            sf.write(output_path, wav, 24000) # Ensure sample rate matches model's output or is appropriate.
    
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_image_file:
            waveform_path = tmp_image_file.name
            plt.figure(figsize=(10, 2))
            plt.plot(np.linspace(0, len(wav)/24000, len(wav)), wav, color='orange')
            plt.xlabel("Time (s)")
            plt.ylabel("Amplitude")
            plt.title("Waveform")
            plt.tight_layout()
            plt.savefig(waveform_path)
            plt.close() # Important: Close the plot to free up memory after saving the image.
        
        return output_path, waveform_path
    except Exception as e:
        print(f"❌ Gradio Error: {e}")
        return None, None

# This cell defines a function for Gradio to convert text to speech.
# It uses the loaded TTS model to synthesize audio and saves it to a WAV file.
# <<< RECOMMENDATION: This function 'tts_gradio' seems redundant if 'synthesize_speech_gr' is used for the Gradio interface.
# Consider removing it unless there's a specific reason to keep both.
# If you keep it, ensure it's called somewhere, otherwise it's dead code.
def tts_gradio(text):
    try:
        wav = tts.tts(text)
        sf.write("output.wav", wav, 22050) # Saves the generated audio at a sample rate of 22050 Hz.
        return "output.wav"
    except Exception as e:
        print("❌ Error in TTS generation:", e)
        return None

# This cell calls the speech synthesis function with a test phrase and prints the output file path.
# It's used to quickly verify that the speech generation is working correctly during development.
# <<< IMPORTANT: The function called here is 'synthesize_speech', but the one defined is 'synthesize_speech_gr'.
# You need to correct the function name here.
# Also, for deployment, this test call might not be needed or should be placed under an 'if __name__ == "__main__":' block.
# output_path, waveform_path = synthesize_speech_gr("Hello! This is a test.", 1.0, 1.0, 1.0) # Corrected function call
# print("Output path:", output_path)


# This cell sets up and launches the Gradio web interface for the Text-to-Speech model.
# Users can input text and adjust speech parameters, then get audio and a waveform plot as output.
iface = gr.Interface(
    fn=synthesize_speech_gr, # Ensure this function is defined and handles all outputs correctly.
    inputs=[
        gr.Textbox(lines=3, label="Enter Text"), # Text input field.
        gr.Slider(0.5, 2.0, value=1.0, label="Speed"), # Slider for controlling speech speed.
        gr.Slider(0.5, 2.0, value=1.0, label="Pitch"), # Slider for controlling speech pitch.
        gr.Slider(0.0, 2.0, value=1.0, label="Volume") # Slider for controlling speech volume.
    ],
    outputs=[
        gr.Audio(type="filepath", label="Generated Speech"), # Audio output of the synthesized speech.
        gr.Image(label="Waveform") # Image output of the speech waveform.
    ],
    title="🗣️ Text-to-Speech (TTS) Bot with Adjustable Voice Parameters", # Title of the Gradio interface.
    description="Enter text and Hear the generated voice 😁✌️ | ⭐ [GitHub Repo](https://github.com/hrnrxb/TTS-with-Adjustable-Voice-Parameters) | 🌐 [My Website](https://hrnrxb.github.io)" # Description shown on the interface.
)

iface.launch() # Launches the Gradio interface, making it publicly accessible via a shareable link.