|
import streamlit as st |
|
|
|
|
|
st.set_page_config( |
|
page_title="Paper Classification Service", |
|
page_icon="π", |
|
layout="wide" |
|
) |
|
|
|
import PyPDF2 |
|
import io |
|
from model import PaperClassifier |
|
|
|
|
|
@st.cache_resource |
|
def load_classifier(model_type): |
|
return PaperClassifier(model_type) |
|
|
|
|
|
@st.cache_data |
|
def extract_pdf_text(pdf_bytes): |
|
"""Extract text from PDF and try to separate title and abstract""" |
|
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() + "\n" |
|
|
|
|
|
lines = text.split('\n') |
|
title = lines[0] if lines else "" |
|
abstract = "\n".join(lines[1:]) if len(lines) > 1 else "" |
|
|
|
return title.strip(), abstract.strip() |
|
|
|
|
|
available_models = list(PaperClassifier.AVAILABLE_MODELS.keys()) |
|
|
|
|
|
st.sidebar.title("Model Settings") |
|
selected_model = st.sidebar.selectbox( |
|
"Select Model", |
|
available_models, |
|
index=0, |
|
help="Choose the model to use for classification" |
|
) |
|
|
|
|
|
model_info = PaperClassifier.AVAILABLE_MODELS[selected_model] |
|
st.sidebar.markdown(f""" |
|
### Selected Model |
|
**Name:** {model_info['name']} |
|
**Description:** {model_info['description']} |
|
""") |
|
|
|
|
|
classifier = load_classifier(selected_model) |
|
|
|
|
|
st.title("π Academic Paper Classification") |
|
st.markdown(""" |
|
This service helps you classify academic papers into different categories. |
|
You can either: |
|
- Enter the paper's title and abstract separately |
|
- Upload a PDF file |
|
""") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.subheader("Option 1: Manual Input") |
|
|
|
|
|
title_input = st.text_input( |
|
"Paper Title:", |
|
placeholder="Enter the paper title..." |
|
) |
|
|
|
|
|
abstract_input = st.text_area( |
|
"Paper Abstract (optional):", |
|
height=200, |
|
placeholder="Enter the paper abstract (optional)..." |
|
) |
|
|
|
if st.button("Classify Paper"): |
|
if title_input.strip(): |
|
with st.spinner("Classifying..."): |
|
result = classifier.classify_paper( |
|
title=title_input, |
|
abstract=abstract_input if abstract_input.strip() else None |
|
) |
|
|
|
st.success("Classification Complete!") |
|
st.write(f"**Input Type:** {result['input_type'].replace('_', ' ').title()}") |
|
st.write(f"**Model Used:** {result['model_used']}") |
|
|
|
|
|
st.subheader("Top Categories (95% Confidence)") |
|
total_prob = 0 |
|
for cat_info in result['top_categories']: |
|
prob = cat_info['probability'] |
|
total_prob += prob |
|
st.progress(prob, text=f"{cat_info['category']} ({cat_info['arxiv_category']}): {prob:.1%}") |
|
|
|
st.info(f"Total probability of shown categories: {total_prob:.1%}") |
|
else: |
|
st.warning("Please enter at least the paper title.") |
|
|
|
with col2: |
|
st.subheader("Option 2: PDF Upload") |
|
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") |
|
|
|
if uploaded_file is not None: |
|
if st.button("Classify PDF"): |
|
try: |
|
with st.spinner("Processing PDF..."): |
|
|
|
title, abstract = extract_pdf_text(uploaded_file.read()) |
|
|
|
if not title: |
|
st.error("Could not extract title from PDF.") |
|
st.stop() |
|
|
|
|
|
with st.expander("Show extracted text"): |
|
st.write("**Extracted Title:**") |
|
st.write(title) |
|
if abstract: |
|
st.write("**Extracted Abstract:**") |
|
st.write(abstract) |
|
|
|
|
|
result = classifier.classify_paper( |
|
title=title, |
|
abstract=abstract if abstract else None |
|
) |
|
|
|
st.success("Classification Complete!") |
|
st.write(f"**Input Type:** {result['input_type'].replace('_', ' ').title()}") |
|
st.write(f"**Model Used:** {result['model_used']}") |
|
|
|
|
|
st.subheader("Top Categories (95% Confidence)") |
|
total_prob = 0 |
|
for cat_info in result['top_categories']: |
|
prob = cat_info['probability'] |
|
total_prob += prob |
|
st.progress(prob, text=f"{cat_info['category']} ({cat_info['arxiv_category']}): {prob:.1%}") |
|
|
|
st.info(f"Total probability of shown categories: {total_prob:.1%}") |
|
except Exception as e: |
|
st.error(f"Error processing PDF: {str(e)}") |
|
|
|
|
|
st.sidebar.markdown("---") |
|
st.sidebar.title("Available Models") |
|
st.sidebar.markdown(""" |
|
- **DistilBERT**: Fast and lightweight |
|
- **DeBERTa v3**: Advanced performance |
|
- **T5**: Versatile text-to-text |
|
- **RoBERTa**: Strong performance |
|
- **SciBERT**: Specialized for science |
|
""") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown("Made with β€οΈ using Streamlit and Transformers") |