Spaces:

fdaudens
/

podcast-jobs-rss-test

Running on Zero

App Files Files Community

podcast-jobs-rss-test / update_rss.py

fdaudens HF Staff

link in description

55e52b0 17 days ago

raw

history blame contribute delete

3.24 kB

	import xml.etree.ElementTree as ET
	from datetime import datetime
	import os
	from huggingface_hub import InferenceClient

	client = InferenceClient(
	"Qwen/Qwen3-32B",
	provider="hf-inference",
	token=os.getenv("HF_TOKEN"),
	)

	def generate_headline_and_description(subject: str, steering_question: str \| None = None) -> tuple[str, str]:
	"""Ask the LLM for a headline and a short description for the podcast episode."""
	prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
	1. A catchy, informative headline for a podcast episode about it (max 15 words).
	2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.

	Here is the topic:
	{subject[:10000]}
	"""
	messages = [
	{"role": "system", "content": "You are a world-class podcast producer."},
	{"role": "user", "content": prompt},
	]
	response = client.chat_completion(
	messages,
	max_tokens=512,
	)
	full_text = response.choices[0].message.content.strip()
	# Try to split headline and description
	lines = [l.strip() for l in full_text.splitlines() if l.strip()]
	if len(lines) >= 2:
	headline = lines[0]
	description = " ".join(lines[1:])
	else:
	headline = full_text[:80]
	description = full_text
	return headline, description

	# -----------------------------------------------------------------------------
	# UPDATE RSS
	# -----------------------------------------------------------------------------
	def get_next_episode_number(podcast_dir="podcasts"):
	files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
	return len(files) + 1

	def update_rss(subject, audio_url, audio_length, paper_id=None, rss_path="rss.xml"):
	# Generate headline and description automatically
	title, description = generate_headline_and_description(subject)
	if paper_id:
	paper_url = f"https://huggingface.co/papers/{paper_id}"
	description += f"\n\n[Read the paper on Hugging Face]({paper_url})"

	tree = ET.parse(rss_path)
	root = tree.getroot()
	channel = root.find("channel")

	# Update lastBuildDate
	last_build_date = channel.find("lastBuildDate")
	now_rfc2822 = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
	if last_build_date is not None:
	last_build_date.text = now_rfc2822

	# Create new item
	item = ET.Element("item")
	ET.SubElement(item, "title").text = title
	ET.SubElement(item, "description").text = description
	ET.SubElement(item, "pubDate").text = now_rfc2822
	ET.SubElement(item, "enclosure", url=audio_url, length=str(audio_length), type="audio/mpeg")
	ET.SubElement(item, "guid").text = audio_url
	ET.SubElement(item, "itunes:explicit").text = "false"

	# Insert new item after lastBuildDate (i.e., as the first item)
	# Find the first <item> and insert before it, or append if none exist
	items = channel.findall("item")
	if items:
	channel.insert(list(channel).index(items[0]), item)
	else:
	channel.append(item)

	# Write back to file
	tree.write(rss_path, encoding="utf-8", xml_declaration=True)