Refactor data extraction functions: rename extract_pmc_data to extract_sd_data, add extract_phys_data, and update file handling for new data sources
a4b6d0b
| from arvix import extract_arxiv_data | |
| from pmc import extract_sd_data | |
| from phys import extract_phys_data | |
| import json | |
| import dotenv | |
| import os | |
| from concurrent.futures import ThreadPoolExecutor | |
| dotenv.load_dotenv() | |
| ACCESS_KEY = os.getenv("ACCESS_KEY") | |
| def fetch_arxiv_data(): | |
| return json.loads(extract_arxiv_data()) | |
| def fetch_sd_data(): | |
| return json.loads(extract_sd_data()) | |
| def fetch_phys_data(): | |
| return json.loads(extract_phys_data()) | |
| def fetch_data(user_access_key): | |
| if user_access_key != ACCESS_KEY: | |
| papers_data = {"status": "Invalid access key"} | |
| else: | |
| papers_data = {} | |
| try: | |
| papers_data['status'] = 'success' | |
| papers_data['data'] = {} | |
| with ThreadPoolExecutor() as executor: | |
| pmc_future = executor.submit(fetch_sd_data) | |
| arxiv_future = executor.submit(fetch_arxiv_data) | |
| phys_future = executor.submit(fetch_phys_data) | |
| pmc_data = pmc_future.result() | |
| arxiv_data = arxiv_future.result() | |
| phys_data = phys_future.result() | |
| for topic, topic_data in pmc_data.items(): | |
| if topic_data['count'] == 0: | |
| continue | |
| else: | |
| papers_data['data'][topic] = {} | |
| papers_data['data'][topic]['ids'] = topic_data['ids'] | |
| for topic, topic_data in arxiv_data.items(): | |
| if topic_data['count'] == 0: | |
| continue | |
| else: | |
| papers_data['data'][topic] = {} | |
| papers_data['data'][topic]['ids'] = topic_data['ids'] | |
| for topic, topic_data in phys_data.items(): | |
| if topic_data['count'] == 0: | |
| continue | |
| else: | |
| papers_data['data'][topic] = {} | |
| papers_data['data'][topic]['ids'] = topic_data['ids'] | |
| except Exception as e: | |
| print(str(e)) | |
| papers_data['status'] = 'error' | |
| data = json.dumps(papers_data, indent=4, ensure_ascii=False) | |
| return data | |
| if __name__ == '__main__': | |
| data = fetch_data(ACCESS_KEY) | |
| with open('data.json', 'w') as f: | |
| f.write(data) |