Spaces:
Sleeping
Sleeping
Refactor summarization and email sending logic; improve error handling and environment variable checks
f50b29d
| import json | |
| import os | |
| import time | |
| import dotenv | |
| import html | |
| from summarize_paper import summarize_paper | |
| from fetch_data import fetch_paper_data_with_category | |
| from post_blog import post_blog | |
| from send_mail import send_email | |
| dotenv.load_dotenv() | |
| ACCESS_KEY = os.getenv("ACCESS_KEY") | |
| def fix_text(text: str) -> str: | |
| text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., ")) | |
| fixed_text = "" | |
| for word in text.split(): | |
| try: | |
| fixed_text += word.encode('latin1').decode('utf-8') + " " | |
| except Exception: | |
| fixed_text += word + " " | |
| return fixed_text.encode('utf-8').decode() | |
| def paper_data(paper_data_json: str, wait_time: int = 5) -> str: | |
| result_data = {"status": "success", "data": {}} | |
| papers_by_category = json.loads(paper_data_json) | |
| for category, papers in papers_by_category.items(): | |
| print(f"Processing category: {category}") | |
| result_data["data"][category] = {} | |
| for paper_id, details in papers.items(): | |
| doi = details.get("doi") | |
| pdf_url = details.get("pdf_url") | |
| title = details.get("title") | |
| title = html.escape(title) if title else "" | |
| citation = details.get("citation") | |
| if not all([paper_id, doi, pdf_url, title, citation]): | |
| print(f"Skipping paper with ID: {paper_id} (missing details)") | |
| continue | |
| summary, mindmap = None, None | |
| max_retries = 3 | |
| retry_count = 0 | |
| while (not summary or not mindmap) and retry_count < max_retries: | |
| try: | |
| summary, mindmap = summarize_paper(pdf_url, paper_id, ACCESS_KEY) | |
| if summary and mindmap: | |
| break | |
| except Exception as e: | |
| print(f"Error summarizing paper {paper_id}: {e}") | |
| retry_count += 1 | |
| if retry_count < max_retries: | |
| print(f"Retrying paper {paper_id} in 3 minutes") | |
| time.sleep(3 * 60) | |
| if not summary or not mindmap: | |
| print(f"Failed to summarize paper {paper_id} after {max_retries} attempts") | |
| continue | |
| try: | |
| fixed_title = html.escape(fix_text(title).strip()) | |
| fixed_citation = html.escape(fix_text(citation).strip()) | |
| fixed_summary = html.escape(str(summary).strip()) | |
| fixed_mindmap = html.escape(str(mindmap).strip()) | |
| post_status = post_blog(doi, fixed_title, category, fixed_summary, fixed_mindmap, fixed_citation, ACCESS_KEY, wait_time) | |
| except Exception as e: | |
| print(f"Error posting blog '{title}': {e}") | |
| continue | |
| result_data["data"][category][paper_id] = { | |
| "id": paper_id, | |
| "doi": doi, | |
| "title": fixed_title, | |
| "category": category, | |
| "posted": post_status, | |
| "citation": fixed_citation, | |
| "summary": fixed_summary, | |
| "mindmap": fixed_mindmap, | |
| } | |
| return json.dumps(result_data, indent=4, ensure_ascii=False) | |
| def post_blogpost(uaccess_key: str, wait_time: int = 5) -> str: | |
| if uaccess_key != ACCESS_KEY: | |
| return False | |
| data = fetch_paper_data_with_category(uaccess_key) | |
| processed_data = paper_data(data, wait_time) | |
| try: | |
| send_email(processed_data) | |
| print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n") | |
| except Exception as e: | |
| print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n") | |
| finally: | |
| print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n") | |
| return processed_data | |
| def test(uaccess_key: str) -> str: | |
| if uaccess_key != ACCESS_KEY: | |
| return False | |
| test_data = { | |
| "Economics": { | |
| "2501.00578": { | |
| "paper_id": "2501.00578", | |
| "doi": "https://doi.org/10.1002/alz.14328", | |
| "title": "Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project", | |
| "category": "Economics", | |
| "pdf_url": "https://arxiv.org/pdf/2501.00578", | |
| "citation": "Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578", | |
| } | |
| } | |
| } | |
| return paper_data(json.dumps(test_data, ensure_ascii=False, indent=4)) | |
| if __name__ == '__main__': | |
| result = test(ACCESS_KEY) | |
| print(result) | |