File size: 2,650 Bytes
05d3cbf
 
7b6fb4e
05d3cbf
 
4b73d5c
c361e08
7b6fb4e
4b73d5c
 
05d3cbf
 
bb6e368
0aa161b
6459222
4b73d5c
0aa161b
4b73d5c
05d3cbf
4b73d5c
 
05d3cbf
bb6e368
 
 
 
b3d0e64
 
 
 
 
 
 
161fa06
bb6e368
b3d0e64
a5c316e
4caa659
55866b4
05d3cbf
 
b43a3bd
b3d0e64
05d3cbf
 
4caa659
05d3cbf
 
bb6e368
4caa659
b43a3bd
05d3cbf
 
161fa06
b43a3bd
10aef09
7ea5716
05d3cbf
fe4d261
391069f
23bdb5a
ae6fcec
161fa06
ae6fcec
 
 
3b2841b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import os
import subprocess
from gtts import gTTS
from pydub import AudioSegment
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
from PIL import Image

generator = pipeline('text-generation', model="checkpoints")
tokenizer = GPT2Tokenizer.from_pretrained('checkpoints')
os.environ["TOKENIZERS_PARALLELISM"] = "true"

def generate_output(name, date_of_birth, progress=gr.Progress()):
    prompt = f"Bienvenido {name}:"
    input_tokens = tokenizer.encode(prompt, add_special_tokens=True)
    input_text = tokenizer.decode(input_tokens)
    gpt2_output = generator(input_text, max_length=60, do_sample=True, temperature=0.6)
    generated_text = gpt2_output[0]['generated_text']
    generated_text = generated_text.replace(input_text, "").strip()
    if len(gpt2_output) == 0 or 'generated_text' not in gpt2_output[0]:
        return None, "No se pudo generar el texto."

    def _progress(generated, to_generate):
        progress((generated, to_generate))
    _progress(0, 1)

    try:
        tts = gTTS(generated_text, lang='es')
        temp_audio_path = "temp_audio.mp3"
        tts.save(temp_audio_path)
        audio_path = "audio.wav"
        audio = AudioSegment.from_mp3(temp_audio_path)
        audio.export(audio_path, format="wav")
        print("Archivo de audio generado:", audio_path)
        _progress(1, 2)
    except Exception as e:
        return None, f"No se pudo generar el audio: {str(e)}"

    command = f"python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face oraculo.jpg --audio audio.wav --outfile video.mp4 --nosmooth"
    process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if process.returncode != 0:
        error_message = process.stderr.decode("utf-8")
        return None, f"No se pudo generar el video: {error_message}"

    output_video_path = "video.mp4"
    os.remove(temp_audio_path)

    if os.path.isfile(output_video_path):
        _progress(2, 2)
        return output_video_path, None
    return None, "No se pudo generar el video"

iface = gr.Interface(
    fn=generate_output,
    inputs=[
        gr.inputs.Textbox(lines=1, label="Nombre", placeholder="Ingresa tu nombre"),
        gr.inputs.Textbox(lines=1, label="Fecha de Nacimiento", placeholder="DD/MM/AAAA")
    ],
    outputs=[
        gr.outputs.Video(label="Respuesta de Andrea (un minuto aproximadamente)").style(width=256),
        #gr.outputs.Textbox(label="Mensaje de error", type="text")
    ],
    title="Oráculo de Inteligencia Artificial v2.1",
    description="Por favor, ingresa tu nombre y fecha de nacimiento."
)

iface.launch()