import streamlit as st import pandas as pd from huggify_data.scrape_modules import PDFQnAGenerator import tempfile def main(): st.set_page_config(layout="wide") st.title("PDF Question-Answer Generator using Huggify-Data Package") # Expander in the sidebar for instruction with st.sidebar.expander("Instruction"): st.write(''' 🎉 Introducing Huggify-Data: Your Ultimate PDF Data Scraping and Uploading Tool! 🎉 👋 I'm thrilled to present the new user-friendly interface for my Python package, huggify-data. This powerful tool simplifies the process of scraping data from PDFs and generating question and answer pairs using OpenAI, making it perfect for building conversational chatbots. 🤖✨ 🚀 Key Features: 1. Easy PDF Data Extraction: Quickly scrape text content from PDFs and convert it into a structured data frame. 2. Automated Question-Answer Pair Generation: Extract meaningful question-answer pairs from your PDF content, ideal for training chatbots. 3. User-Friendly Interface: Interact with the package without any programming experience, making information accessibility easier and more efficient. 🔧 How It Works: - API Key: Add your OpenAI API Key. - Load Your PDF: Easily load any PDF file into the library. - Just wait: Wait and download the `.csv` from the app. 📈 Why Huggify-Data? Whether you're a data scientist, developer, or AI enthusiast, Huggify-Data streamlines the process of preparing your PDF data for AI applications. It's never been easier to transform your PDFs into valuable datasets for building conversational AI models. 🔗 Links: - **GitHub Repository**: [https://lnkd.in/eJEJebcw](https://lnkd.in/eJEJebcw) - **Documentation**: [https://lnkd.in/eF9JFXAP](https://lnkd.in/eF9JFXAP) - **Notebook**: [https://lnkd.in/eaA2qaPt](https://lnkd.in/eaA2qaPt) - **App**: [https://huggingface.co/spaces/eagle0504/huggify-data](https://huggingface.co/spaces/eagle0504/huggify-data) Don't forget to like, comment, and subscribe for more updates and tutorials on AI and data science! 👍🔔 #HuggifyData #PythonLibrary #AI #DataScience #HuggingFace #PDFScraping #Chatbot #OpenSource #Yiqiao ''') # Sidebar for uploading the PDF file st.sidebar.title("Upload PDF") uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type="pdf") # Text input for OpenAI API key openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") # Embed YouTube video in the sidebar st.sidebar.video("https://youtu.be/CfMcw4OTLCQ") if uploaded_file is not None and openai_api_key: # Save the uploaded PDF to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: temp_pdf.write(uploaded_file.read()) temp_pdf_path = temp_pdf.name # Show a spinner while processing the PDF with st.spinner('Processing the PDF and generating questions and answers...'): # Process the PDF and generate the questions and answers generator = PDFQnAGenerator(temp_pdf_path, openai_api_key) generator.process_scraped_content() generator.generate_questions_answers() df = generator.convert_to_dataframe() # Display the resulting DataFrame st.subheader("Generated Question-Answer Pairs") st.write(df) # Option to download the DataFrame as a CSV csv = df.to_csv(index=False).encode('utf-8') st.download_button( label="Download as CSV", data=csv, file_name='questions_answers.csv', mime='text/csv', ) if __name__ == "__main__": main()