|
import PyPDF2 |
|
import os |
|
|
|
def extract_pdf_content(pdf_file): |
|
""" |
|
Extracts text content from a PDF file. |
|
|
|
Args: |
|
pdf_file: The uploaded PDF file. |
|
|
|
Returns: |
|
str: Extracted text content from the PDF. |
|
""" |
|
if pdf_file is None: |
|
return "No PDF file was uploaded. Please upload a PDF file." |
|
|
|
try: |
|
|
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
|
|
if not text.strip(): |
|
return "No text content could be extracted from the PDF. The file might be scanned or contain only images." |
|
|
|
return text |
|
except Exception as e: |
|
return f"Error extracting text from PDF: {str(e)}" |
|
|