import os from PyPDF2 import PdfReader from docx import Document def process_pdf(file_path): reader = PdfReader(file_path) text = "" for page in reader.pages: text += page.extract_text() + "\n" return text.split('\n\n') # تقسيم النص إلى فقرات def process_docx(file_path): doc = Document(file_path) paragraphs = [p.text for p in doc.paragraphs if p.text.strip() != ""] return paragraphs def process_txt(file_path): with open(file_path, 'r', encoding='utf-8') as f: text = f.read() return text.split('\n\n') def process_documents(file_path): ext = os.path.splitext(file_path)[1].lower() if ext == '.pdf': return process_pdf(file_path) elif ext == '.docx': return process_docx(file_path) elif ext == '.txt': return process_txt(file_path) else: return []