from papers import PaperManager from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE import soundfile as sf import numpy as np import argparse from huggingface_hub import HfApi import requests import json from datetime import datetime import os import tempfile from update_rss import generate_headline_and_description, get_next_episode_number, update_rss def submit_job( inference_provider: str, hf_token: str ): # Configuration variables username = HfApi(token=hf_token).whoami()["name"] space_id = "fdaudens/podcast-jobs-rss-test" # Your Space ID flavor = "cpu-basic" # Machine type # Create the API request url = f"https://huggingface.co/api/jobs/{username}" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {hf_token}" } payload = { "spaceId": space_id, "command": ["python", "run_job.py"], "arguments": [ "--provider", inference_provider ], "environment": { "HF_API_KEY": hf_token }, "flavor": flavor } # Launch the job response = requests.post(url, headers=headers, data=json.dumps(payload)) return response.text def main(): parser = argparse.ArgumentParser(description="Podcast job runner") parser.add_argument("--provider", type=str, default="hf-inference") parser.add_argument("--name", type=str, default="podcast") parser.add_argument("--flavor", type=str, default="t4-medium") args = parser.parse_args() print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}") # 1. Get the most popular paper's content paper_manager = PaperManager() top_papers = paper_manager.get_top_content() # Get the first (most popular) paper's id and text first_paper = list(top_papers.values())[0] subject = first_paper['content'] paper_id = first_paper['id'] # 2. Generate the podcast script podcast_script = generate_podcast_script(subject) # 3. Synthesize the podcast audio lines = [l for l in podcast_script.strip().splitlines() if l.strip()] sr = 24000 speed = 1.0 audio_segments = [] pipeline = kpipeline pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE) pipeline_voice_male = pipeline.load_voice(MALE_VOICE) for line in lines: if line.startswith("[MIKE]"): pipeline_voice = pipeline_voice_male voice = MALE_VOICE utterance = line[len("[MIKE]"):].strip() elif line.startswith("[JANE]"): pipeline_voice = pipeline_voice_female voice = FEMALE_VOICE utterance = line[len("[JANE]"):].strip() else: pipeline_voice = pipeline_voice_female voice = FEMALE_VOICE utterance = line for _, ps, _ in pipeline(utterance, voice, speed): ref_s = pipeline_voice[len(ps) - 1] audio_numpy = kmodel(ps, ref_s, speed).numpy() audio_segments.append(audio_numpy) # Concatenate all audio segments if audio_segments: full_audio = np.concatenate(audio_segments) # Create a temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: temp_path = temp_file.name sf.write(temp_path, full_audio, sr) # Get API token from environment hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY") if hf_token is None: print("No Hugging Face token found in environment. Cannot upload to Space.") return # Initialize the Hugging Face API api = HfApi(token=hf_token) # Set up Space path info space_id = "fdaudens/podcast-jobs-rss-test" # Your Space ID today = datetime.now().strftime("%Y-%m-%d") base_name = args.name podcast_filename = f"{base_name}-{today}.wav" # Path in the Space repository space_path = f"podcasts/{podcast_filename}" # Upload directly to the Space (crucial: repo_type="space") print(f"Uploading podcast to Space {space_id} at path {space_path}...") api.upload_file( path_or_fileobj=temp_path, path_in_repo=space_path, repo_id=space_id, repo_type="space", token=hf_token ) audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}" audio_length = os.path.getsize(temp_path) # Clean up temporary file os.unlink(temp_path) print(f"Podcast audio uploaded to Space at {space_path}") print(f"Access URL: {audio_url}") # After uploading the podcast audio # headline, description = generate_headline_and_description(subject) # episode_number = get_next_episode_number() update_rss(subject, audio_url, audio_length, paper_id=paper_id) else: print("No audio generated.") if __name__ == "__main__": main()