Spaces:
Sleeping
Sleeping
Commit
·
28f08c2
1
Parent(s):
7117f63
Update app.py (#2)
Browse files- Update app.py (a3839305306296d4cdeed96580213b96ac2fc19e)
Co-authored-by: Harm de Vries <[email protected]>
app.py
CHANGED
|
@@ -17,6 +17,7 @@ for index, row in df.iterrows():
|
|
| 17 |
all_languages = list(tags.keys())
|
| 18 |
|
| 19 |
|
|
|
|
| 20 |
@st.cache()
|
| 21 |
def load_data(language, ext):
|
| 22 |
ds = load_dataset(
|
|
@@ -27,61 +28,72 @@ def load_data(language, ext):
|
|
| 27 |
return ds
|
| 28 |
|
| 29 |
|
| 30 |
-
col1, col2, _ = st.
|
| 31 |
with col1:
|
| 32 |
-
chosen_language = st.selectbox(
|
| 33 |
label="Select a programming language", options=all_languages, index=0
|
| 34 |
)
|
| 35 |
with col2:
|
| 36 |
-
chosen_ext = st.selectbox(
|
| 37 |
label="Select an extension", options=tags[chosen_language], index=0
|
| 38 |
)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# load the dataset and get indexes of non lexable files
|
| 42 |
samples = load_data(chosen_language, chosen_ext)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
max_docs = len(samples)
|
| 44 |
samples = samples.add_column("idx", range(len(samples)))
|
| 45 |
-
not_lexed = samples.filter(lambda x: not x["lexable"])
|
| 46 |
-
indexes_not_lexed = not_lexed["idx"]
|
| 47 |
-
|
| 48 |
|
| 49 |
# info about extension
|
| 50 |
-
st.sidebar.markdown("### Information about the extension:")
|
| 51 |
-
text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
|
| 52 |
-
{df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
|
| 53 |
-
are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
|
| 54 |
-
st.sidebar.markdown(text)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
st.markdown("#### File content:")
|
| 82 |
-
if example["lexable"]:
|
| 83 |
-
st.code(example["content"], language=chosen_language)
|
| 84 |
-
else:
|
| 85 |
-
st.text(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
|
| 86 |
-
st.text(str(example['content']))
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
all_languages = list(tags.keys())
|
| 18 |
|
| 19 |
|
| 20 |
+
|
| 21 |
@st.cache()
|
| 22 |
def load_data(language, ext):
|
| 23 |
ds = load_dataset(
|
|
|
|
| 28 |
return ds
|
| 29 |
|
| 30 |
|
| 31 |
+
col1, col2, _ = st.columns([1, 1, 4])
|
| 32 |
with col1:
|
| 33 |
+
chosen_language = st.sidebar.selectbox(
|
| 34 |
label="Select a programming language", options=all_languages, index=0
|
| 35 |
)
|
| 36 |
with col2:
|
| 37 |
+
chosen_ext = st.sidebar.selectbox(
|
| 38 |
label="Select an extension", options=tags[chosen_language], index=0
|
| 39 |
)
|
| 40 |
|
| 41 |
+
st.sidebar.header("Filters")
|
| 42 |
+
not_lexable = st.sidebar.checkbox("Not lexable?")
|
| 43 |
+
low_alphanum = st.sidebar.checkbox("Low alphanum count?")
|
| 44 |
+
long_lines = st.sidebar.checkbox("Long lines?")
|
| 45 |
+
|
| 46 |
|
| 47 |
# load the dataset and get indexes of non lexable files
|
| 48 |
samples = load_data(chosen_language, chosen_ext)
|
| 49 |
+
|
| 50 |
+
if not_lexable:
|
| 51 |
+
samples = samples.filter(lambda x: not x["lexable"])
|
| 52 |
+
if low_alphanum:
|
| 53 |
+
samples = samples.filter(lambda x: x["low_alphanum"])
|
| 54 |
+
if long_lines:
|
| 55 |
+
samples = samples.filter(lambda x: x["long_lines"])
|
| 56 |
+
|
| 57 |
max_docs = len(samples)
|
| 58 |
samples = samples.add_column("idx", range(len(samples)))
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# info about extension
|
| 61 |
+
# st.sidebar.markdown("### Information about the extension:")
|
| 62 |
+
# text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
|
| 63 |
+
# {df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
|
| 64 |
+
# are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
|
| 65 |
+
# st.sidebar.markdown(text)
|
| 66 |
+
|
| 67 |
+
if max_docs > 0:
|
| 68 |
+
col_1, _ = st.columns([3, 3])
|
| 69 |
+
with col_1:
|
| 70 |
+
index_example = st.number_input(
|
| 71 |
+
f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
|
| 72 |
+
min_value=0,
|
| 73 |
+
max_value=max_docs - 1,
|
| 74 |
+
value=0,
|
| 75 |
+
step=1,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# info about the chosen example
|
| 80 |
+
example = samples[index_example]
|
| 81 |
+
|
| 82 |
+
# st.markdown("#### Information about the chosen example:")
|
| 83 |
+
# text_alpha = "**has**" if example["long_lines"] else "doesn't have"
|
| 84 |
+
# text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
|
| 85 |
+
# text_lexer = "is" if example["lexable"] else "**isn't**"
|
| 86 |
+
|
| 87 |
+
# st.markdown(
|
| 88 |
+
# f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
|
| 89 |
+
# {text_lines} very long lines, and {text_lexer} lexable."
|
| 90 |
+
# )
|
| 91 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
# display file content
|
| 94 |
+
st.markdown("#### File content:")
|
| 95 |
+
if not example["lexable"]:
|
| 96 |
+
st.write(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
|
| 97 |
+
st.text(example['content'])
|
| 98 |
+
else:
|
| 99 |
+
st.code(example["content"], language=chosen_language)
|