victormiller commited on
Commit
5171d34
1 Parent(s): 8a16e84

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +22 -0
curated.py CHANGED
@@ -571,6 +571,28 @@ phil_examples = Div(
571
  ),
572
  )
573
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
  arx_examples = Div(
575
  Div(
576
  get_arx_data(target=gen_random_id()),
 
571
  ),
572
  )
573
 
574
+ def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
575
+ doc_id = max(0, min(int(doc_id), 9))
576
+
577
+ if data_source == "Arxiv":
578
+ raw_sample_doc = json.load(open("data/curated_samples/arxiv_raw.json"))
579
+ extracted_sample_doc = json.load(
580
+ open("data/curated_samples/arxiv_extract.json")
581
+ )
582
+ else:
583
+ raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
584
+
585
+ raw_json = raw_sample_doc[doc_id]
586
+ extracted_json = extracted_sample_doc[doc_id]
587
+ return view_data(
588
+ raw_json,
589
+ extracted_json,
590
+ doc_id=doc_id,
591
+ data_source="Arxiv",
592
+ data_sources="Arxiv",
593
+ target=target,
594
+ )
595
+
596
  arx_examples = Div(
597
  Div(
598
  get_arx_data(target=gen_random_id()),