Spaces:
Running
Running
File size: 2,650 Bytes
05d3cbf 7b6fb4e 05d3cbf 4b73d5c c361e08 7b6fb4e 4b73d5c 05d3cbf bb6e368 0aa161b 6459222 4b73d5c 0aa161b 4b73d5c 05d3cbf 4b73d5c 05d3cbf bb6e368 b3d0e64 161fa06 bb6e368 b3d0e64 a5c316e 4caa659 55866b4 05d3cbf b43a3bd b3d0e64 05d3cbf 4caa659 05d3cbf bb6e368 4caa659 b43a3bd 05d3cbf 161fa06 b43a3bd 10aef09 7ea5716 05d3cbf fe4d261 391069f 23bdb5a ae6fcec 161fa06 ae6fcec 3b2841b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
import os
import subprocess
from gtts import gTTS
from pydub import AudioSegment
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
from PIL import Image
generator = pipeline('text-generation', model="checkpoints")
tokenizer = GPT2Tokenizer.from_pretrained('checkpoints')
os.environ["TOKENIZERS_PARALLELISM"] = "true"
def generate_output(name, date_of_birth, progress=gr.Progress()):
prompt = f"Bienvenido {name}:"
input_tokens = tokenizer.encode(prompt, add_special_tokens=True)
input_text = tokenizer.decode(input_tokens)
gpt2_output = generator(input_text, max_length=60, do_sample=True, temperature=0.6)
generated_text = gpt2_output[0]['generated_text']
generated_text = generated_text.replace(input_text, "").strip()
if len(gpt2_output) == 0 or 'generated_text' not in gpt2_output[0]:
return None, "No se pudo generar el texto."
def _progress(generated, to_generate):
progress((generated, to_generate))
_progress(0, 1)
try:
tts = gTTS(generated_text, lang='es')
temp_audio_path = "temp_audio.mp3"
tts.save(temp_audio_path)
audio_path = "audio.wav"
audio = AudioSegment.from_mp3(temp_audio_path)
audio.export(audio_path, format="wav")
print("Archivo de audio generado:", audio_path)
_progress(1, 2)
except Exception as e:
return None, f"No se pudo generar el audio: {str(e)}"
command = f"python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face oraculo.jpg --audio audio.wav --outfile video.mp4 --nosmooth"
process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode != 0:
error_message = process.stderr.decode("utf-8")
return None, f"No se pudo generar el video: {error_message}"
output_video_path = "video.mp4"
os.remove(temp_audio_path)
if os.path.isfile(output_video_path):
_progress(2, 2)
return output_video_path, None
return None, "No se pudo generar el video"
iface = gr.Interface(
fn=generate_output,
inputs=[
gr.inputs.Textbox(lines=1, label="Nombre", placeholder="Ingresa tu nombre"),
gr.inputs.Textbox(lines=1, label="Fecha de Nacimiento", placeholder="DD/MM/AAAA")
],
outputs=[
gr.outputs.Video(label="Respuesta de Andrea (un minuto aproximadamente)").style(width=256),
#gr.outputs.Textbox(label="Mensaje de error", type="text")
],
title="Oráculo de Inteligencia Artificial v2.1",
description="Por favor, ingresa tu nombre y fecha de nacimiento."
)
iface.launch() |