podcast-jobs-rss-test / update_rss.py
fdaudens's picture
fdaudens HF Staff
link in description
55e52b0
import xml.etree.ElementTree as ET
from datetime import datetime
import os
from huggingface_hub import InferenceClient
client = InferenceClient(
"Qwen/Qwen3-32B",
provider="hf-inference",
token=os.getenv("HF_TOKEN"),
)
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
"""Ask the LLM for a headline and a short description for the podcast episode."""
prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
1. A catchy, informative headline for a podcast episode about it (max 15 words).
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
Here is the topic:
{subject[:10000]}
"""
messages = [
{"role": "system", "content": "You are a world-class podcast producer."},
{"role": "user", "content": prompt},
]
response = client.chat_completion(
messages,
max_tokens=512,
)
full_text = response.choices[0].message.content.strip()
# Try to split headline and description
lines = [l.strip() for l in full_text.splitlines() if l.strip()]
if len(lines) >= 2:
headline = lines[0]
description = " ".join(lines[1:])
else:
headline = full_text[:80]
description = full_text
return headline, description
# -----------------------------------------------------------------------------
# UPDATE RSS
# -----------------------------------------------------------------------------
def get_next_episode_number(podcast_dir="podcasts"):
files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
return len(files) + 1
def update_rss(subject, audio_url, audio_length, paper_id=None, rss_path="rss.xml"):
# Generate headline and description automatically
title, description = generate_headline_and_description(subject)
if paper_id:
paper_url = f"https://huggingface.co/papers/{paper_id}"
description += f"\n\n[Read the paper on Hugging Face]({paper_url})"
tree = ET.parse(rss_path)
root = tree.getroot()
channel = root.find("channel")
# Update lastBuildDate
last_build_date = channel.find("lastBuildDate")
now_rfc2822 = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
if last_build_date is not None:
last_build_date.text = now_rfc2822
# Create new item
item = ET.Element("item")
ET.SubElement(item, "title").text = title
ET.SubElement(item, "description").text = description
ET.SubElement(item, "pubDate").text = now_rfc2822
ET.SubElement(item, "enclosure", url=audio_url, length=str(audio_length), type="audio/mpeg")
ET.SubElement(item, "guid").text = audio_url
ET.SubElement(item, "itunes:explicit").text = "false"
# Insert new item after lastBuildDate (i.e., as the first item)
# Find the first <item> and insert before it, or append if none exist
items = channel.findall("item")
if items:
channel.insert(list(channel).index(items[0]), item)
else:
channel.append(item)
# Write back to file
tree.write(rss_path, encoding="utf-8", xml_declaration=True)