victormiller
commited on
Commit
•
5171d34
1
Parent(s):
8a16e84
Update curated.py
Browse files- curated.py +22 -0
curated.py
CHANGED
@@ -571,6 +571,28 @@ phil_examples = Div(
|
|
571 |
),
|
572 |
)
|
573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
arx_examples = Div(
|
575 |
Div(
|
576 |
get_arx_data(target=gen_random_id()),
|
|
|
571 |
),
|
572 |
)
|
573 |
|
574 |
+
def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
|
575 |
+
doc_id = max(0, min(int(doc_id), 9))
|
576 |
+
|
577 |
+
if data_source == "Arxiv":
|
578 |
+
raw_sample_doc = json.load(open("data/curated_samples/arxiv_raw.json"))
|
579 |
+
extracted_sample_doc = json.load(
|
580 |
+
open("data/curated_samples/arxiv_extract.json")
|
581 |
+
)
|
582 |
+
else:
|
583 |
+
raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
|
584 |
+
|
585 |
+
raw_json = raw_sample_doc[doc_id]
|
586 |
+
extracted_json = extracted_sample_doc[doc_id]
|
587 |
+
return view_data(
|
588 |
+
raw_json,
|
589 |
+
extracted_json,
|
590 |
+
doc_id=doc_id,
|
591 |
+
data_source="Arxiv",
|
592 |
+
data_sources="Arxiv",
|
593 |
+
target=target,
|
594 |
+
)
|
595 |
+
|
596 |
arx_examples = Div(
|
597 |
Div(
|
598 |
get_arx_data(target=gen_random_id()),
|