import os
import uuid
import asyncio
import subprocess
import json
from zipfile import ZipFile
import stat
import gradio as gr
import ffmpeg
import cv2
from googletrans import Translator
from huggingface_hub import HfApi
import moviepy.editor as mp
import spaces
from TTS.api import TTS

os.environ["COQUI_TOS_AGREED"] = "1"

# Константы и инициализация
HF_TOKEN = os.environ.get("HF_TOKEN")
REPO_ID = "artificialguybr/video-dubbing"
MAX_VIDEO_DURATION = 60  # секунд

api = HfApi(token=HF_TOKEN)

# Распаковка и установка разрешений для ffmpeg
ZipFile("ffmpeg.zip").extractall()
st = os.stat('ffmpeg')
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)

language_mapping = { # Словарь для сопоставления языков.
    'English': 'en',
    'Spanish': 'es',
    'French': 'fr',
    'German': 'de',
    'Italian': 'it',
    'Portuguese': 'pt',
    'Polish': 'pl',
    'Turkish': 'tr',
    'Russian': 'ru',
    'Dutch': 'nl',
    'Czech': 'cs',
    'Arabic': 'ar',
    'Chinese (Simplified)': 'zh-CN',
    'Japanese': 'ja',
    'Korean': 'ko',
    'Hindi': 'hi',
    'Swedish': 'sv',
    'Danish': 'da',
    'Finnish': 'fi',
    'Greek': 'el'
}

print("Starting the program...")

def generate_unique_filename(extension): # Функция для генерации уникальных имен файлов.
    return f"{uuid.uuid4()}{extension}"

def cleanup_files(*files): # Функция для удаления файлов.
    for file in files:
        if file and os.path.exists(file):
            os.remove(file)
            print(f"Removed file: {file}")

@spaces.GPU(duration=90)
def transcribe_audio(file_path): # Функция для транскрибирования аудио.
   # ... (Остается без изменений)
    
def voice_clone_from_video(video_path, output_speaker_file=output_file):

async def text_to_speech(text, language, output_file):
    # Инициализация TTS
    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda" if use_cuda else "cpu"),

    # Генерация речи с использованием XTTS
    tts.tts_to_file(text=text,
                    speaker="random", # Можно указать конкретного спикера, если есть
                    language=language,
                    file_path=output_file)
@spaces.GPU
def process_video(video, target_language, use_wav2lip):
   # ... (Основные изменения внутри этой функции)
    try:
        if target_language is None:
            raise ValueError("Please select a Target Language for Dubbing.")

        run_uuid = uuid.uuid4().hex[:6] #Генерируем уникальный идентификатор для запуска.
        output_filename = f"{run_uuid}_resized_video.mp4"
        ffmpeg.input(video).output(output_filename, vf='scale=-2:720').run() #Изменяем размер видео.

        video_path = output_filename

        if not os.path.exists(video_path):
            raise FileNotFoundError(f"Error: {video_path} does not exist.")

        video_info = ffmpeg.probe(video_path) #Получаем информацию о видео.
        video_duration = float(video_info['streams'][0]['duration'])

        if video_duration > MAX_VIDEO_DURATION: #Проверяем длительность видео.
            cleanup_files(video_path)
            raise ValueError(f"Video duration exceeds {MAX_VIDEO_DURATION} seconds. Please upload a shorter video.")

        ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run() #Извлекаем аудио из видео.

        subprocess.run(f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav", shell=True, check=True)

        whisper_text = transcribe_audio(f"{run_uuid}_output_audio_final.wav") #Транскрибируем аудио.
        print(f"Transcription successful: {whisper_text}")

        target_language_code = language_mapping[target_language] #Получаем код языка.
        translator = Translator() #Инициализируем переводчик.
        translated_text = translator.translate(whisper_text, dest=target_language_code).text #Переводим текст.
        print(f"Translated text: {translated_text}")

        asyncio.run(text_to_speech(translated_text, target_language_code, f"{run_uuid}_output_synth.wav"))

        if use_wav2lip:
            try:
               subprocess.run(f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face '{video_path}' --audio '{run_uuid}_output_synth.wav' --pads 0 15 0 0 --resize_factor 1 --nosmooth --outfile '{run_uuid}_output_video.mp4'", shell=True, check=True)
            except subprocess.CalledProcessError as e:
                print(f"Wav2Lip error: {str(e)}")
                gr.Warning("Wav2lip encountered an error. Falling back to simple audio replacement.")
                subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True, check=True)
        else: #Просто заменяем аудио, если Wav2Lip не используется.
            subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True, check=True)

        output_video_path = f"{run_uuid}_output_video.mp4"
        if not os.path.exists(output_video_path):
            raise FileNotFoundError(f"Error: {output_video_path} was not generated.")

        cleanup_files( #Удаляем временные файлы.
            f"{run_uuid}_resized_video.mp4",
            f"{run_uuid}_output_audio.wav",
            f"{run_uuid}_output_audio_final.wav",
            f"{run_uuid}_output_synth.wav"
        )

        return output_video_path, ""

    except Exception as e:
        print(f"Error in process_video: {str(e)}")
        return None, f"Error: {str(e)}"

# Gradio interface setup
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# AI Video Dubbing")
    gr.Markdown("This tool uses AI to dub videos into different languages. Upload a video, choose a target language, and get a dubbed version!")

    with gr.Row():
        with gr.Column(scale=2):
            video_input = gr.Video(label="Upload Video") #Поле для загрузки видео.
            target_language = gr.Dropdown(
                choices=list(language_mapping.keys()),
                label="Target Language for Dubbing",
                value="Spanish"
            ) #Выпадающий список для выбора целевого языка.
            use_wav2lip = gr.Checkbox(
                label="Use Wav2Lip for lip sync",
                value=False,
                info="Enable this if the video has close-up faces. May not work for all videos."
            ) #Чекбокс для выбора Wav2Lip.
            submit_button = gr.Button("Process Video", variant="primary") #Кнопка для запуска обработки видео.

        with gr.Column(scale=2):
            output_video = gr.Video(label="Processed Video") #Поле для отображения обработанного видео.
            error_message = gr.Textbox(label="Status/Error Message") #Поле для отображения сообщений об ошибках.

    submit_button.click( #Связываем кнопку с функцией обработки видео.
        process_video,
        inputs=[video_input, target_language, use_wav2lip],
        outputs=[output_video, error_message]
    )

    gr.Markdown("""
    ## Notes:
    - Video limit is 1 minute. The tool will dub all speakers using a single voice.
    - Processing may take up to 5 minutes.
    - This is an alpha version using open-source models.
    - Quality vs. speed trade-off was made for scalability and hardware limitations.
    - For videos longer than 1 minute, please duplicate this Space and adjust the limit in the code.
    """)

    gr.Markdown("""
    ---
    Developed by [@artificialguybr](https://twitter.com/artificialguybr) using open-source tools.
    Special thanks to Hugging Face for GPU support and [@yeswondwer](https://twitter.com/@yeswondwerr) for the original code.

    Try our [Video Transcription and Translation](https://huggingface.co/spaces/artificialguybr/VIDEO-TRANSLATION-TRANSCRIPTION) tool!
    """)

print("Launching Gradio interface...")
demo.queue()
demo.launch()