Spaces:
Build error
Build error
| import abc | |
| import pandas as pd | |
| import pickle | |
| class SimilarityAlg(metaclass=abc.ABCMeta): | |
| """Similarity Algorithm to compute similarity between query_embedding and embeddings""" | |
| def __init__(self) -> None: | |
| pass | |
| def __call__(self, query_embedding, embeddings) -> None: | |
| pass | |
| class Embedding_Model(metaclass=abc.ABCMeta): | |
| """Embedding Model to compute embedding of a text""" | |
| def __init__(self, model_name) -> None: | |
| """Initialize the embedding model""" | |
| embedding_cache_path = f"/app/ckpt/embedding_cache_{model_name}.pkl" | |
| self.embedding_cache_path = embedding_cache_path | |
| # load the cache if it exists, and save a copy to disk | |
| try: | |
| embedding_cache = pd.read_pickle(embedding_cache_path) | |
| except FileNotFoundError: | |
| embedding_cache = {} | |
| with open(embedding_cache_path, "wb") as embedding_cache_file: | |
| pickle.dump(embedding_cache, embedding_cache_file) | |
| self.embedding_cache = embedding_cache | |
| self.model_name = model_name | |
| def __call__(self, text) -> None: | |
| """Compute the embedding of the text""" | |
| pass | |
| class AbstractPDFParser(metaclass=abc.ABCMeta): | |
| """ PDF parser to parse a PDF file""" | |
| def __init__(self, db_name) -> None: | |
| """Initialize the pdf database""" | |
| db_cache_path = f"/app/ckpt/pdf_parser_{db_name}.pkl" | |
| self.db_cache_path = db_cache_path | |
| # load the cache if it exists, and save a copy to disk | |
| try: | |
| db_cache = pd.read_pickle(db_cache_path) | |
| except FileNotFoundError: | |
| db_cache = {} | |
| with open(db_cache_path, "wb") as cache_file: | |
| pickle.dump(db_cache, cache_file) | |
| self.db_cache = db_cache | |
| self.db_name = db_name | |
| def parse_pdf(self,) -> None: | |
| """Parse the PDF file""" | |
| pass | |
| def _get_metadata(self, ) -> None: | |
| """Get the metadata of the PDF file""" | |
| pass | |
| def get_paragraphs(self, ) -> None: | |
| """Get the paragraphs of the PDF file""" | |
| pass | |
| def get_split_paragraphs(self, ) -> None: | |
| """ | |
| Get the split paragraphs of the PDF file | |
| Return: | |
| split_paragraphs: dict of metadata and corresponding list of split paragraphs | |
| """ | |
| pass | |
| def _determine_metadata_of_paragraph(self, paragraph) -> None: | |
| """ | |
| Determine the metadata of a paragraph | |
| Return: | |
| metadata: metadata of the paragraph | |
| """ | |
| pass | |
| # @abc.abstractmethod | |
| # def _determine_optimal_split_of_pargraphs(self, ) -> None: | |
| # """ | |
| # Determine the optimal split of paragraphs | |
| # Return: | |
| # split_paragraphs: dict of metadata and corresponding list of split paragraphs | |
| # """ | |
| # pass | |
| class ChatbotEngine(metaclass=abc.ABCMeta): | |
| def __init__(self,) -> None: | |
| pass | |
| def query(self, user_query): | |
| pass | |