generate_lipsynced_videos_IITM_API / lip_sync_online_v1_silence_remove.py
utkarsh2299's picture
Upload 13 files
45a279f verified
"""
Lip syncing code - Online version
"""
import os
import sys
import moviepy.editor as mp
from pysrt import SubRipFile, SubRipTime, SubRipItem
import argparse
def srt_time_to_seconds(time_datetime_format):
time_iso_format = time_datetime_format.isoformat()
# print(time_iso_format)
seconds = float(time_iso_format.split(':')[-1])
rest_list = time_iso_format.split(':')[:-1]
# print(rest_list)
if ":".join(rest_list) == "00:00":
pass
else:
for i in range(0,len(rest_list)):
if i == 0:
# print(rest_list[i])
seconds += int(rest_list[i])*3600
elif i == 1:
# print(rest_list[i])
seconds += int(rest_list[i])*60
else:
sys.exit("Error in time conversion")
return seconds
def lip_sync(video_path, srt_path, audio_list, output_video, output_srt):
print("length of audui list:", len(audio_list))
# output video filename without extension
output_filename = os.path.splitext(output_video)[0]
# read the original video as obj
video_obj = mp.VideoFileClip(video_path)
# read target srt file
srt = SubRipFile.open(srt_path)
# initialize variables
video_clips = []
prev_end_time = SubRipTime(0)
new_srt_start_time = []
new_srt_end_time = []
new_srt_total_duration = 0.0
sil_total_dur = 0
sil_total_dur_effective = 0
aud_total_dur = 0
# iterate through srt entries
for i, srt_entry in enumerate(srt):
print(f"srt_num: {i+1}")
start_time = srt_entry.start.to_time()
end_time = srt_entry.end.to_time()
if srt_time_to_seconds(start_time) > srt_time_to_seconds(prev_end_time.to_time()):
# Create a silent audio clip with the same duration as the video
sil_dur = srt_time_to_seconds(start_time) - srt_time_to_seconds(prev_end_time.to_time())
sil_total_dur += sil_dur
sil_dur = sil_dur * 0.9194003385185787
sil_total_dur_effective += sil_dur
silence = mp.afx.audio_loop(mp.AudioFileClip("silence.wav"), duration=(sil_dur), nloops=1)
# Set the silent audio track in the video clip
# add inter-srt clip to video_clips
inter_srt_clip = video_obj.subclip(srt_time_to_seconds(prev_end_time.to_time()), srt_time_to_seconds(start_time))
inter_srt_clip = inter_srt_clip.fx(mp.vfx.speedx, factor=inter_srt_clip.duration/sil_dur)
inter_srt_clip_without_audio = inter_srt_clip.without_audio()
inter_srt_clip_with_silence = inter_srt_clip_without_audio.set_audio(silence)
video_clips.append(inter_srt_clip_with_silence)
new_srt_total_duration += inter_srt_clip_with_silence.duration
srt_clip = video_obj.subclip(srt_time_to_seconds(start_time), srt_time_to_seconds(end_time))
srt_clip_without_audio = srt_clip.without_audio()
video_clips.append(srt_clip_without_audio)
# print("i:",i)
# read audio file
audio_clip = mp.AudioFileClip(audio_list[i])
aud_total_dur+=audio_clip.duration
# interpolate video to match audio duration
if srt_clip.duration != audio_clip.duration:
srt_clip = srt_clip.fx(mp.vfx.speedx, factor=srt_clip.duration/audio_clip.duration)
srt_clip_without_audio = srt_clip.without_audio()
# replace audio in video clip
video_clip = srt_clip.set_audio(audio_clip)
new_srt_start_time.append(new_srt_total_duration)
new_srt_end_time.append(new_srt_total_duration+video_clip.duration)
new_srt_total_duration += video_clip.duration
# add video clip to list
video_clips[-1] = video_clip
# update previous end time
prev_end_time = srt_entry.end
print("silence total duration",sil_total_dur)
print("silence total duration effective",sil_total_dur_effective)
print("new_srt_total_duration",new_srt_total_duration)
print("aud_total_dur",aud_total_dur)
# check if last srt ends before video ends and add final clip if necessary
last_srt_end_time = srt[-1].end.to_time()
video_duration = video_obj.duration
print("Completed for loop")
try:
if last_srt_end_time != video_duration:
final_clip = video_obj.subclip(srt_time_to_seconds(last_srt_end_time), video_duration)
final_clip_without_audio = final_clip.without_audio()
# Create a silent audio clip with the same duration as the video
silence = mp.afx.audio_loop(mp.AudioFileClip("silence.wav"), duration=(video_duration - srt_time_to_seconds(last_srt_end_time)), nloops=1)
final_clip_with_silence = final_clip_without_audio.set_audio(silence)
video_clips.append(final_clip_with_silence)
# video_clips.append(final_clip_without_audio)
except:
print("Except")
print("try , except function completed here")
# concatenate video clips and write final video
final_video = mp.concatenate_videoclips(video_clips)
# final_video.write_videofile("output_video.mp4", threads=30)
final_video.write_videofile(output_video, codec="libx264", threads=30, audio_codec='aac', temp_audiofile=output_filename+'.m4a', remove_temp=True, preset="medium", ffmpeg_params=["-profile:v","baseline", "-level","3.0","-pix_fmt", "yuv420p"])
# create new srt file with updated timings
new_srt = SubRipFile()
for i, srt_entry in enumerate(srt):
new_start_time = SubRipTime(seconds=new_srt_start_time[i])
new_end_time = SubRipTime(seconds=new_srt_end_time[i])
new_srt.append(SubRipItem(index=i+1, start=new_start_time, end=new_end_time, text=srt_entry.text))
# Write the updated srt file to file
new_srt.save(output_srt, encoding='utf-8')
def parse_arguments():
parser = argparse.ArgumentParser(description="Lip syncing script with command-line arguments.")
parser.add_argument("--original_video_path", required=True, help="Path to the original video file.")
parser.add_argument("--Translated_SRT", required=True, help="Path to the translated SRT file.")
parser.add_argument("--audio_dir", required=True, help="Path to the directory containing TTS audio files.")
parser.add_argument("--new_video", required=True, help="Path for the output video file.")
parser.add_argument("--new_srt", required=True, help="Path for the output SRT file.")
return parser.parse_args()
if __name__ == "__main__":
args = parse_arguments()
video_path = args.original_video_path
srt_path = args.Translated_SRT
audio_dir = args.audio_dir
output_video = args.new_video
output_srt = args.new_srt
audio_list = os.listdir(audio_dir)
audio_list.sort()
audio_list = [os.path.join(audio_dir, file) for file in audio_list]
print("srt_path:", srt_path)
lip_sync(video_path, srt_path, audio_list, output_video, output_srt)