Spaces:
Sleeping
Sleeping
| from fasthtml.common import * | |
| import json | |
| data_sources = [ | |
| "Freelaw", | |
| "Wikipedia", | |
| "PhilPapers", | |
| "Arxiv", | |
| "S2ORC", | |
| "S2ORC Abstract", | |
| "Pubmed", | |
| "USPTO", | |
| "Hackernews", | |
| "Ubuntu IRC", | |
| "StackExchange", | |
| "DM Maths", | |
| "PG19", | |
| "Europarl", | |
| ] | |
| def get_data(data_source: str = "Freelaw", doc_id: int = 3): | |
| doc_id = max(0, min(int(doc_id), 9)) | |
| if data_source == "Freelaw": | |
| raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json")) | |
| extracted_sample_doc = json.load( | |
| open("data/curated_samples/freelaw_extract.json") | |
| ) | |
| elif data_source == "Wikipedia": | |
| raw_sample_doc = extracted_sample_doc = json.load( | |
| open("data/curated_samples/wiki.json") | |
| ) | |
| elif data_source == "StackExchange": | |
| raw_sample_doc = json.load(open("data/curated_samples/stackexchange_raw.json")) | |
| extracted_sample_doc = json.load( | |
| open("data/curated_samples/stackexchange_extract.json") | |
| ) | |
| elif data_source == "PhilPapers": | |
| raw_sample_doc = extracted_sample_doc = json.load( | |
| open("data/curated_samples/philpapers_raw.json") | |
| ) | |
| elif data_source == "Arxiv": | |
| raw_sample_doc = json.load(open("data/curated_samples/arxiv_raw.json")) | |
| extracted_sample_doc = json.load( | |
| open("data/curated_samples/arxiv_extract.json") | |
| ) | |
| elif data_source == "S2ORC": | |
| raw_sample_doc = extracted_sample_doc = json.load( | |
| open("data/curated_samples/s2orc_raw.json") | |
| ) | |
| elif data_source == "S2ORC Abstract": | |
| raw_sample_doc = extracted_sample_doc = json.load( | |
| open("data/curated_samples/s2orc_abstract_raw.json") | |
| ) | |
| elif data_source == "Pubmed": | |
| raw_sample_doc = json.load(open("data/curated_samples/pubmed_raw.json")) | |
| extracted_sample_doc = json.load( | |
| open("data/curated_samples/pubmed_extract.json") | |
| ) | |
| elif data_source == "DM Maths": | |
| raw_sample_doc = json.load(open("data/curated_samples/dm_maths_raw.json")) | |
| extracted_sample_doc = json.load( | |
| open("data/curated_samples/dm_maths_extract.json") | |
| ) | |
| elif data_source == "PG19": | |
| raw_sample_doc = extracted_sample_doc = json.load( | |
| open("data/curated_samples/pg19_raw.json") | |
| ) | |
| elif data_source == "Europarl": | |
| raw_sample_doc = extracted_sample_doc = json.load( | |
| open("data/curated_samples/europarl_raw.json") | |
| ) | |
| else: | |
| raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)] | |
| raw_json = raw_sample_doc[doc_id] | |
| extracted_json = extracted_sample_doc[doc_id] | |
| drop_down = Select( | |
| *[Option(ds, value=ds, selected=(ds == data_source)) for ds in data_sources], | |
| name="data_source", | |
| hx_get="/curated", | |
| hx_target="#colcontent", | |
| hx_trigger="change", | |
| hx_swap="innerHTML", | |
| ) | |
| slider = Input( | |
| type="range", | |
| name="doc_id", | |
| min="0", | |
| max="9", | |
| value=str(doc_id), | |
| hx_get="/curated", | |
| hx_target="#colcontent", | |
| hx_trigger="change", | |
| hx_swap="innerHTML", | |
| hx_include="[name='data_source']", | |
| ) | |
| form = Form( | |
| Div( | |
| Label("Data source: ", drop_down), | |
| ), | |
| Div( | |
| Label("Data sample: ", slider, f"{doc_id}", cls="plotly_slider"), | |
| ), | |
| cls="plotly_input_container", | |
| ) | |
| col1 = Div( | |
| H3("Raw format"), | |
| Pre( | |
| json.dumps(raw_json, indent=4), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="width: 48%; float: left; overflow-x: auto;", | |
| ) | |
| col2 = Div( | |
| H3("Extracted format"), | |
| Pre( | |
| json.dumps(extracted_json, indent=4), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="width: 48%; float: right; overflow-x: auto;", | |
| ) | |
| data_display = Div( | |
| col1, | |
| col2, | |
| style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", | |
| ) | |
| return Div(form, data_display, style="margin-top: 10px;", id="colcontent") | |