File size: 1,300 Bytes
c10dac6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, UnstructuredHTMLLoader, BSHTMLLoader
from langchain.document_loaders.pdf import PyPDFDirectoryLoader

class DocumentLoader:
    """
    A class to handle loading of documents from various sources.
    """

    def __init__(self, data_path):
        """
        Initializes the document loader with a given data path.
        
        :param data_path: Path to the data directory.
        """
        self.data_path = data_path
        self.loader = None

    def load_documents(self):
        """
        Loads documents from the specified data path.
        Handles errors related to path validity and document loading.

        :return: List of loaded documents.
        """
        if not os.path.exists(self.data_path):
            raise FileNotFoundError(f"The specified path '{self.data_path}' does not exist.")

        try:
            # Assuming PDF files in a directory
            self.loader = PyPDFDirectoryLoader(self.data_path)
            documents = self.loader.load()

            if not documents:
                raise ValueError("No documents found in the specified path.")

            return documents

        except Exception as e:
            raise RuntimeError(f"Error loading documents: {e}")