awt-pic2vid

Sleeping

App Files Files Community

awt-pic2vid / app.py

fffiloni

set queue

daa780f about 1 year ago

raw

history blame

8.09 kB

	import gradio as gr
	import subprocess
	from moviepy.editor import VideoFileClip
	import datetime

	def convert_to_mp4_with_aac(input_path, output_path):
	# Load the video
	video = VideoFileClip(input_path)

	# Set the output format to mp4 with AAC codec
	video.write_videofile(output_path, codec="libx264", audio_codec="aac")

	return output_path


	# Function to check if the audio file path exists in the list
	def check_file_exists(file_path, audio_list):
	return file_path in audio_list

	def load_audio(audio_listed):
	if audio_listed is None:
	return None
	else:
	return f"data/audio/{audio_listed}"

	def execute_command(command: str) -> None:
	subprocess.run(command, check=True)

	def infer(audio_input, image_path, emotional_style):
	# Get the current timestamp
	timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

	output_name = f"lipsynced_result_{timestamp}"

	command = [
	f"python",
	f"inference_for_demo_video.py",
	f"--wav_path={audio_input}",
	f"--style_clip_path=data/style_clip/3DMM/{emotional_style}",
	f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
	f"--image_path={image_path}",
	f"--cfg_scale=1.0",
	f"--max_gen_len=30",
	f"--output_name={output_name}"
	]

	execute_command(command)

	# Convert video to compatible codecs
	input_file = f"output_video/{output_name}.mp4"
	output_file = f"{output_name}.mp4"

	result = convert_to_mp4_with_aac(input_file, output_file)

	return result

	css="""
	#col-container{
	margin: 0 auto;
	max-width: 940px;
	}
	#project-links{
	margin: 0 0 12px !important;
	column-gap: 8px;
	display: flex;
	justify-content: center;
	flex-wrap: nowrap;
	flex-direction: row;
	align-items: center;
	}
	#run-btn{
	border: var(--button-border-width) solid var(--button-primary-border-color);
	background: var(--button-primary-background-fill);
	color: var(--button-primary-text-color);
	}
	#run-btn:hover{
	border-color: var(--button-primary-border-color-hover);
	background: var(--button-primary-background-fill-hover);
	color: var(--button-primary-text-color-hover);
	}
	"""
	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.HTML("""
	<h2 style="text-align: center;">DreamTalk</h2>
	<p style="text-align: center;">When Expressive Talking Head Generation Meets Diffusion Probabilistic Models</p>
	<p style="margin:12px auto;display: flex;justify-content: center;">
	<a href="https://huggingface.co/spaces/fffiloni/dreamtalk?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
	</p>

	""")
	with gr.Row():
	with gr.Column():
	image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
	audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"], value="data/audio/acknowledgement_english.m4a")
	with gr.Row():
	audio_list = gr.Dropdown(
	label="Choose an audio (optional)",
	choices=[
	"German1.wav", "German2.wav", "German3.wav", "German4.wav",
	"acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
	"chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
	"french1.wav", "french2.wav", "french3.wav",
	"italian1.wav", "italian2.wav", "italian3.wav",
	"japan1.wav", "japan2.wav", "japan3.wav",
	"korean1.wav", "korean2.wav", "korean3.wav",
	"noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
	"spanish1.wav", "spanish2.wav", "spanish3.wav"
	],
	value = "acknowledgement_english.m4a"
	)
	audio_list.change(
	fn = load_audio,
	inputs = [audio_list],
	outputs = [audio_input]
	)
	emotional_style = gr.Dropdown(
	label = "emotional style",
	choices = [
	"M030_front_angry_level3_001.mat",
	"M030_front_contempt_level3_001.mat",
	"M030_front_disgusted_level3_001.mat",
	"M030_front_fear_level3_001.mat",
	"M030_front_happy_level3_001.mat",
	"M030_front_neutral_level1_001.mat",
	"M030_front_sad_level3_001.mat",
	"M030_front_surprised_level3_001.mat",
	"W009_front_angry_level3_001.mat",
	"W009_front_contempt_level3_001.mat",
	"W009_front_disgusted_level3_001.mat",
	"W009_front_fear_level3_001.mat",
	"W009_front_happy_level3_001.mat",
	"W009_front_neutral_level1_001.mat",
	"W009_front_sad_level3_001.mat",
	"W009_front_surprised_level3_001.mat",
	"W011_front_angry_level3_001.mat",
	"W011_front_contempt_level3_001.mat",
	"W011_front_disgusted_level3_001.mat",
	"W011_front_fear_level3_001.mat",
	"W011_front_happy_level3_001.mat",
	"W011_front_neutral_level1_001.mat",
	"W011_front_sad_level3_001.mat",
	"W011_front_surprised_level3_001.mat"
	],
	value = "M030_front_neutral_level1_001.mat"
	)
	gr.Examples(
	examples = [
	"data/src_img/uncropped/face3.png",
	"data/src_img/uncropped/male_face.png",
	"data/src_img/uncropped/uncut_src_img.jpg",
	"data/src_img/cropped/chpa5.png",
	"data/src_img/cropped/cut_img.png",
	"data/src_img/cropped/f30.png",
	"data/src_img/cropped/menglu2.png",
	"data/src_img/cropped/nscu2.png",
	"data/src_img/cropped/zp1.png",
	"data/src_img/cropped/zt12.png"
	],
	inputs=[image_path],
	examples_per_page=5
	)
	with gr.Row():
	gr.ClearButton([audio_input, image_path, audio_list])
	run_btn = gr.Button("Run", elem_id="run-btn")
	with gr.Column():
	output_video = gr.Video(format="mp4")
	gr.HTML("""
	<p id="project-links" align="center">
	<a href='https://dreamtalk-project.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://arxiv.org/abs/2312.09767'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> <a href='https://youtu.be/VF4vlE6ZqWQ'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
	</p>
	<img src="https://github.com/ali-vilab/dreamtalk/raw/main/media/teaser.gif" style="margin: 0 auto;border-radius: 10px;" />
	""")

	run_btn.click(
	fn = infer,
	inputs = [audio_input, image_path, emotional_style],
	outputs = [output_video]
	)

	demo.queue(max_size=20).launch()