import streamlit as st import io import PyPDF2 from transformers import pipeline from gtts import gTTS # Function to extract text from a PDF def extract_text_from_pdf(pdf_file): pdf_stream = io.BytesIO(pdf_file.read()) pdf_reader = PyPDF2.PdfReader(pdf_stream) text = "" for page in pdf_reader.pages: text += page.extract_text() or "" # Handle None for non-text pages return text # Function to generate discussion points def generate_discussion_points(text): summarizer = pipeline('summarization') summary = summarizer(text, max_length=600, min_length=300, do_sample=False) return summary[0]['summary_text'] # Function to convert text to speech def text_to_speech(text): tts = gTTS(text=text, lang='en') tts.save("discussion_points.mp3") # Streamlit app st.title("PDF Analysis and Discussion Generator") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: # Extract text from the uploaded PDF text = extract_text_from_pdf(uploaded_file) st.subheader("Extracted Text") st.write(text) # Generate and display discussion points st.subheader("Generated Discussion Points") discussion_points = generate_discussion_points(text) st.write(discussion_points) # Convert discussion points to audio and play it text_to_speech(discussion_points) audio_file = open("discussion_points.mp3", "rb") audio_bytes = audio_file.read() st.audio(audio_bytes, format="audio/mp3")