import streamlit as st # Set up the Streamlit page - this must be the first st command st.set_page_config( page_title="Paper Classification Service", page_icon="📚", layout="wide" ) import PyPDF2 import io from model import PaperClassifier # Initialize the classifier with model selection @st.cache_resource def load_classifier(model_type): return PaperClassifier(model_type) # Cache the PDF text extraction @st.cache_data def extract_pdf_text(pdf_bytes): """Extract text from PDF and try to separate title and abstract""" pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" # Try to extract title and abstract lines = text.split('\n') title = lines[0] if lines else "" abstract = "\n".join(lines[1:]) if len(lines) > 1 else "" return title.strip(), abstract.strip() # Get available models for selection available_models = list(PaperClassifier.AVAILABLE_MODELS.keys()) # Add model selection to sidebar st.sidebar.title("Model Settings") selected_model = st.sidebar.selectbox( "Select Model", available_models, index=0, help="Choose the model to use for classification" ) # Display model information model_info = PaperClassifier.AVAILABLE_MODELS[selected_model] st.sidebar.markdown(f""" ### Selected Model **Name:** {model_info['name']} **Description:** {model_info['description']} """) # Initialize the classifier with selected model classifier = load_classifier(selected_model) # Title and description st.title("📚 Academic Paper Classification") st.markdown(""" This service helps you classify academic papers into different categories. You can either: - Enter the paper's title and abstract separately - Upload a PDF file """) # Create two columns for input methods col1, col2 = st.columns(2) with col1: st.subheader("Option 1: Manual Input") # Title input title_input = st.text_input( "Paper Title:", placeholder="Enter the paper title..." ) # Abstract input abstract_input = st.text_area( "Paper Abstract (optional):", height=200, placeholder="Enter the paper abstract (optional)..." ) if st.button("Classify Paper"): if title_input.strip(): with st.spinner("Classifying..."): result = classifier.classify_paper( title=title_input, abstract=abstract_input if abstract_input.strip() else None ) st.success("Classification Complete!") st.write(f"**Input Type:** {result['input_type'].replace('_', ' ').title()}") st.write(f"**Model Used:** {result['model_used']}") # Show top categories st.subheader("Top Categories (95% Confidence)") total_prob = 0 for cat_info in result['top_categories']: prob = cat_info['probability'] total_prob += prob st.progress(prob, text=f"{cat_info['category']} ({cat_info['arxiv_category']}): {prob:.1%}") st.info(f"Total probability of shown categories: {total_prob:.1%}") else: st.warning("Please enter at least the paper title.") with col2: st.subheader("Option 2: PDF Upload") uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") if uploaded_file is not None: if st.button("Classify PDF"): try: with st.spinner("Processing PDF..."): # Extract title and abstract from PDF title, abstract = extract_pdf_text(uploaded_file.read()) if not title: st.error("Could not extract title from PDF.") st.stop() # Show extracted text with st.expander("Show extracted text"): st.write("**Extracted Title:**") st.write(title) if abstract: st.write("**Extracted Abstract:**") st.write(abstract) # Classify the paper result = classifier.classify_paper( title=title, abstract=abstract if abstract else None ) st.success("Classification Complete!") st.write(f"**Input Type:** {result['input_type'].replace('_', ' ').title()}") st.write(f"**Model Used:** {result['model_used']}") # Show top categories st.subheader("Top Categories (95% Confidence)") total_prob = 0 for cat_info in result['top_categories']: prob = cat_info['probability'] total_prob += prob st.progress(prob, text=f"{cat_info['category']} ({cat_info['arxiv_category']}): {prob:.1%}") st.info(f"Total probability of shown categories: {total_prob:.1%}") except Exception as e: st.error(f"Error processing PDF: {str(e)}") # Add information about the models st.sidebar.markdown("---") st.sidebar.title("Available Models") st.sidebar.markdown(""" - **DistilBERT**: Fast and lightweight - **DeBERTa v3**: Advanced performance - **T5**: Versatile text-to-text - **RoBERTa**: Strong performance - **SciBERT**: Specialized for science """) # Add footer st.markdown("---") st.markdown("Made with ❤️ using Streamlit and Transformers")