Spaces:
Running
on
Zero
Running
on
Zero
import xml.etree.ElementTree as ET | |
from datetime import datetime | |
import os | |
from huggingface_hub import InferenceClient | |
client = InferenceClient( | |
"Qwen/Qwen3-32B", | |
provider="hf-inference", | |
token=os.getenv("HF_TOKEN"), | |
) | |
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]: | |
"""Ask the LLM for a headline and a short description for the podcast episode.""" | |
prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate: | |
1. A catchy, informative headline for a podcast episode about it (max 15 words). | |
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting. | |
Here is the topic: | |
{subject[:10000]} | |
""" | |
messages = [ | |
{"role": "system", "content": "You are a world-class podcast producer."}, | |
{"role": "user", "content": prompt}, | |
] | |
response = client.chat_completion( | |
messages, | |
max_tokens=512, | |
) | |
full_text = response.choices[0].message.content.strip() | |
# Try to split headline and description | |
lines = [l.strip() for l in full_text.splitlines() if l.strip()] | |
if len(lines) >= 2: | |
headline = lines[0] | |
description = " ".join(lines[1:]) | |
else: | |
headline = full_text[:80] | |
description = full_text | |
return headline, description | |
# ----------------------------------------------------------------------------- | |
# UPDATE RSS | |
# ----------------------------------------------------------------------------- | |
def get_next_episode_number(podcast_dir="podcasts"): | |
files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")] | |
return len(files) + 1 | |
def update_rss(subject, audio_url, audio_length, paper_id=None, rss_path="rss.xml"): | |
# Generate headline and description automatically | |
title, description = generate_headline_and_description(subject) | |
if paper_id: | |
paper_url = f"https://huggingface.co/papers/{paper_id}" | |
description += f"\n\n[Read the paper on Hugging Face]({paper_url})" | |
tree = ET.parse(rss_path) | |
root = tree.getroot() | |
channel = root.find("channel") | |
# Update lastBuildDate | |
last_build_date = channel.find("lastBuildDate") | |
now_rfc2822 = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") | |
if last_build_date is not None: | |
last_build_date.text = now_rfc2822 | |
# Create new item | |
item = ET.Element("item") | |
ET.SubElement(item, "title").text = title | |
ET.SubElement(item, "description").text = description | |
ET.SubElement(item, "pubDate").text = now_rfc2822 | |
ET.SubElement(item, "enclosure", url=audio_url, length=str(audio_length), type="audio/mpeg") | |
ET.SubElement(item, "guid").text = audio_url | |
ET.SubElement(item, "itunes:explicit").text = "false" | |
# Insert new item after lastBuildDate (i.e., as the first item) | |
# Find the first <item> and insert before it, or append if none exist | |
items = channel.findall("item") | |
if items: | |
channel.insert(list(channel).index(items[0]), item) | |
else: | |
channel.append(item) | |
# Write back to file | |
tree.write(rss_path, encoding="utf-8", xml_declaration=True) |