import os import gradio as gr from cerebras.cloud.sdk import Cerebras from gtts import gTTS import assemblyai as aai from moviepy import VideoFileClip,concatenate_videoclips, AudioFileClip, TextClip, CompositeVideoClip import requests # Initialize Cerebras client Cerekey = os.getenv("Ckey") client = Cerebras(api_key= Cerekey) # Pexels API key pexkey = os.getenv("Pkey") PEXELS_API_KEY = pexkey # assembly AI API key asskey = os.getenv("Akey") aai.settings.api_key = asskey # Modify the system prompt to include the estimated word count based on video duration def generate_script(prompt, max_duration): system_message = f"You are an expert video content creator and narration writer who is proficient in generating narration from user prompts and crafting a concise and poetic narration that aligns with the prompt. Craft a concise, poetic narration for the prompt. Go straight to the narration, don't write a foreward or a description of your action. The narration should be suitable for a video that can be read in less than {max_duration} seconds." stream = client.chat.completions.create( messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}], model="llama-3.3-70b", stream=False, max_completion_tokens=1024, temperature=0.7, top_p=1 ) return stream.choices[0].message.content def search_and_download_videos(query, max_duration, aspect_ratio, download_folder, max_results=6): url = "https://api.pexels.com/videos/search" headers = {"Authorization": PEXELS_API_KEY} params = {"query": query, "per_page": max_results} try: response = requests.get(url, headers=headers, params=params) response.raise_for_status() videos = response.json().get("videos", []) if not os.path.exists(download_folder): os.makedirs(download_folder) downloaded_files = [] for video in videos: duration = video.get("duration") width = video.get("width") height = video.get("height") if width and height: video_aspect_ratio = "landscape" if width > height else "portrait" if height > width else "square" if duration <= max_duration and video_aspect_ratio == aspect_ratio: video_url = video["video_files"][0]["link"] video_id = video["id"] video_filename = os.path.join(download_folder, f"{video_id}.mp4") video_response = requests.get(video_url, stream=True) with open(video_filename, "wb") as file: for chunk in video_response.iter_content(chunk_size=1024): file.write(chunk) downloaded_files.append(video_filename) return downloaded_files except requests.exceptions.RequestException as e: print(f"Error: {e}") return [] def generate_narration(script, output_file="narration.mp3"): tts = gTTS(script, lang="en") tts.save(output_file) return output_file def load_videos_from_folder(folder_path): if not os.path.exists(folder_path): print(f"Error: The folder '{folder_path}' does not exist.") return [] video_files = [ os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(('.mp4', '.mov', '.avi', '.mkv')) ] return video_files def aggregate_videos(clips): if not clips: return None return concatenate_videoclips(clips, method="compose") def trim_video_to_audio_length(final_video, audio_length): if final_video.duration > audio_length: # Use subclipped method for CompositeVideoClip final_video = final_video.subclipped(0, audio_length) return final_video # Function to add narration to the final video def add_narration_to_video(final_video, narration_path): if os.path.exists(narration_path): narration_audio = AudioFileClip(narration_path) narration_audio = narration_audio.with_duration(final_video.duration) # Adjust duration to match video final_video = final_video.with_audio(narration_audio) # Use with_audio instead of set_audio return final_video def save_final_video(final_video, output_path): final_video.write_videofile(output_path, codec="libx264", audio_codec="aac", preset="ultrafast") def split_text_into_lines(data): MaxChars = 40 MaxDuration = 2.5 MaxGap = 1.5 subtitles = [] line = [] line_duration = 0 line_chars = 0 for idx, wd in enumerate(data): # start a new line if too many chars or too long duration if (line_chars + len(wd['word']) > MaxChars) or (line_duration > MaxDuration): subtitles.append({ "word": " ".join(w['word'] for w in line), "start": line[0]['start'], "end": line[-1]['end'], "textcontents": line }) line = [] line_chars = 0 line_duration = 0 line.append(wd) line_chars += len(wd['word']) line_duration = wd['end'] - line[0]['start'] # also split on long pause if idx < len(data)-1 and data[idx+1]['start'] - wd['end'] > MaxGap: subtitles.append({ "word": " ".join(w['word'] for w in line), "start": line[0]['start'], "end": wd['end'], "textcontents": line }) line = [] line_chars = 0 line_duration = 0 if line: subtitles.append({ "word": " ".join(w['word'] for w in line), "start": line[0]['start'], "end": line[-1]['end'], "textcontents": line }) return subtitles def generate_video( prompt: str, max_duration: int, aspect_ratio: str, download_folder: str = "downloaded_videos", max_results: int = 6 ): # 1️⃣ Generate the narration script script = generate_script(prompt, max_duration) # 2️⃣ Search & download Pexels videos videos = search_and_download_videos( prompt, max_duration, aspect_ratio, download_folder, max_results ) if not videos: return "No videos were downloaded.", None, script # 3️⃣ Load and concatenate downloaded clips video_clips = [VideoFileClip(path) for path in videos] final_video = aggregate_videos(video_clips) if final_video is None: return "Error generating video.", None, script # 4️⃣ Generate TTS narration and attach audio narration_file = generate_narration(script) audio_len = AudioFileClip(narration_file).duration final_video = trim_video_to_audio_length(final_video, audio_len) final_video = add_narration_to_video(final_video, narration_file) # 5️⃣ Transcribe narration for word‑level timings transcript = aai.Transcriber().transcribe(narration_file) wordlevel_info = [ { "word": w.text, "start": w.start / 1000.0, "end": w.end / 1000.0 } for w in transcript.words ] # 6️⃣ Split word‑timestamps into line‑level subtitles linelevel_subs = split_text_into_lines(wordlevel_info) # 7️⃣ Build subtitle clips (static + highlights) fw, fh = final_video.size font, fs, ypos = "Helvetica", 44, fh - 64 all_clips = [final_video] for line in linelevel_subs: # ─ Static full‑line text txt = TextClip( line["word"], font=font, fontsize=fs, color="white", stroke_color="black", stroke_width=1 ) x0 = (fw - txt.w) / 2 static = ( txt .set_start(line["start"]) .set_duration(line["end"] - line["start"]) .set_position((x0, ypos)) ) all_clips.append(static) # ─ Word‑by‑word highlight cursor = x0 for wd in line["textcontents"]: wc = TextClip( wd["word"], font=font, fontsize=fs, color="yellow", stroke_color="black", stroke_width=1 ) hl = ( wc .set_start(wd["start"]) .set_duration(wd["end"] - wd["start"]) .set_position((cursor, ypos)) ) all_clips.append(hl) # advance cursor by measuring a space after the word dummy = TextClip(wd["word"] + " ", font=font, fontsize=fs) cursor += dummy.w # 8️⃣ Composite all clips and export subtitled = CompositeVideoClip(all_clips, size=(fw, fh)) \ .set_audio(final_video.audio) output_path = "final_with_subtitles.mp4" subtitled.write_videofile( output_path, fps=24, codec="libx264", audio_codec="aac", preset="ultrafast" ) # Return TTS audio path, final video path, and the script return narration_file, output_path, script iface = gr.Interface( fn=generate_video, inputs=[ gr.Textbox(label="Enter Text Prompt", placeholder="Enter the text to generate the video script."), gr.Slider(minimum=1, maximum=30, step=1, label="Video Length (seconds)", value=10), gr.Radio(choices=["portrait", "landscape", "square"], label="Select Aspect Ratio", value="landscape"), ], outputs=[ gr.Audio(label="Narration Audio"), gr.Video(label="Generated Video"), gr.Textbox(label="Generated Script", interactive=False) ], title="Sepia Text-to-Video Generator", description="Enter a text prompt, specify the length of the video (maximum 30 seconds), select the aspect ratio, and click 'Submit' to get the narrated audio, the video and the script.", live=False ) iface.launch(debug=True)