File size: 3,398 Bytes
e3f66f1
9fcc04d
 
e3f66f1
 
 
 
9fcc04d
 
 
 
e3f66f1
9fcc04d
e3f66f1
 
 
 
9fcc04d
e3f66f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fcc04d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3f66f1
 
 
9fcc04d
e3f66f1
 
 
 
 
9fcc04d
e3f66f1
9fcc04d
e3f66f1
 
 
9fcc04d
 
e3f66f1
 
 
 
 
 
 
 
9fcc04d
 
 
 
 
e3f66f1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
from openai import OpenAI
import tempfile
import time
import typing as tp
from pathlib import Path
from pydub import AudioSegment

from dotenv import load_dotenv
load_dotenv()

client = OpenAI()

class FileCleaner:
    def __init__(self, file_lifetime: float = 3600):
        self.file_lifetime = file_lifetime
        self.files = []

    def add(self, path: tp.Union[str, Path]):
        self._cleanup()
        self.files.append((time.time(), Path(path)))

    def _cleanup(self):
        now = time.time()
        for time_added, path in list(self.files):
            if now - time_added > self.file_lifetime:
                if path.exists():
                    path.unlink()
                self.files.pop(0)
            else:
                break


file_cleaner = FileCleaner()
intro = AudioSegment.from_mp3("intro.mp3")

DEFAULT_SYSTEM_PROMPT = """
You are a podcast editor that specialized to create a script out of a webpage.

[TASKS]
- You will receive a link to a webpage about some content.
- You will create a script out of the content.
- The script should be 1 minute long if you read it out loud.
- Start with an intro to peak the interest of the listener.
- Then, summarize the content in a way that is easy to understand. Ask questions about the the content and answer them.
- Conclude with the most intriguing part of the content.
- Refrain from adding section headers in the script like [INTRUCTION], [CONTENT], [CONCLUSION].
- The script should be inspiring, written in colloquialism with english words and proverbs understood around the world.
- Write in a relax tone.
- Use filler words like 'um', 'ah', 'well' etc. to make it sound more natural.

[BREAKS]
- Use the <break time="1s"/> tag to add a pause in the speech. Use it after each paragraph and after a sentence when appropriate to get to a relax tone.
- Use <break time="3s"/> for longer pauses, to emphasize a point.
"""

def generate_episode(system_prompt, weblink):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": weblink},
        ]
    )
    script = response.choices[0].message.content    
    response = client.audio.speech.create(
        model="tts-1",
        voice="fable",
        input=script,
    )
        
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as podcast_file:
        podcast_file.write(response.content) 
        file_cleaner.add(podcast_file.name)

    podcast = AudioSegment.from_mp3(podcast_file.name)
    merged_audio = intro + podcast
    
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as final_file:
        merged_audio.export(final_file.name, format="mp3")

    print("MP3 files merged successfully!")

    return final_file.name, script

demo = gr.Interface(
    fn=generate_episode,
    inputs=[
        gr.Textbox(label="System Prompt"),
        gr.Textbox(label="Weblink"),
    ],
    outputs=[gr.Audio(label="Podcast Audio"), gr.Textbox(label="Podcast Script")],
    examples=[
        [DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Mount_Tambora"],
        [DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Great_Wall_of_China"],
        [DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Apollo_11"],
    ],
)


if __name__ == "__main__":
    demo.launch()