# utils/file_readers.py import docx import PyPDF2 def read_txt(file_path): with open(file_path, 'r', encoding='utf-8') as f: return f.read() def read_docx(file_path): doc = docx.Document(file_path) full_text = [] for para in doc.paragraphs: full_text.append(para.text) return '\n'.join(full_text) def read_pdf(file_path): text = '' with open(file_path, 'rb') as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text return text def read_file(file_path): if file_path.endswith('.txt'): return read_txt(file_path) elif file_path.endswith('.docx'): return read_docx(file_path) elif file_path.endswith('.pdf'): return read_pdf(file_path) else: return ""