Spaces:

khalidsabri
/

emailresponding

Sleeping

emailresponding / utils /loaders.py

first changes

c10dac6 8 months ago

1.3 kB

	import os
	from langchain.document_loaders import PyPDFLoader, DirectoryLoader, UnstructuredHTMLLoader, BSHTMLLoader
	from langchain.document_loaders.pdf import PyPDFDirectoryLoader

	class DocumentLoader:
	"""
	A class to handle loading of documents from various sources.
	"""

	def __init__(self, data_path):
	"""
	Initializes the document loader with a given data path.

	:param data_path: Path to the data directory.
	"""
	self.data_path = data_path
	self.loader = None

	def load_documents(self):
	"""
	Loads documents from the specified data path.
	Handles errors related to path validity and document loading.

	:return: List of loaded documents.
	"""
	if not os.path.exists(self.data_path):
	raise FileNotFoundError(f"The specified path '{self.data_path}' does not exist.")

	try:
	# Assuming PDF files in a directory
	self.loader = PyPDFDirectoryLoader(self.data_path)
	documents = self.loader.load()

	if not documents:
	raise ValueError("No documents found in the specified path.")

	return documents

	except Exception as e:
	raise RuntimeError(f"Error loading documents: {e}")