mrme77's picture
Upload 6 files
256bf21 verified
raw
history blame contribute delete
822 Bytes
import PyPDF2
import os
def extract_pdf_content(pdf_file):
"""
Extracts text content from a PDF file.
Args:
pdf_file: The uploaded PDF file.
Returns:
str: Extracted text content from the PDF.
"""
if pdf_file is None:
return "No PDF file was uploaded. Please upload a PDF file."
try:
# Gradio file component returns the file path
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
if not text.strip():
return "No text content could be extracted from the PDF. The file might be scanned or contain only images."
return text
except Exception as e:
return f"Error extracting text from PDF: {str(e)}"