Spaces:
Paused
Paused
| import os | |
| from enum import Enum | |
| from typing import Any, Dict, List, Optional | |
| from langchain.callbacks.manager import CallbackManagerForRetrieverRun | |
| from langchain.schema import Document | |
| from langchain.schema.retriever import BaseRetriever | |
| class SearchAPIRetriever(BaseRetriever): | |
| """Search API retriever.""" | |
| pages: List[Dict] = [] | |
| def _get_relevant_documents( | |
| self, query: str, *, run_manager: CallbackManagerForRetrieverRun | |
| ) -> List[Document]: | |
| docs = [ | |
| Document( | |
| page_content=page.get("raw_content", ""), | |
| metadata={ | |
| "title": page.get("title", ""), | |
| "source": page.get("url", ""), | |
| }, | |
| ) | |
| for page in self.pages | |
| ] | |
| return docs | |
| class SectionRetriever(BaseRetriever): | |
| """ | |
| SectionRetriever: | |
| This class is used to retrieve sections while avoiding redundant subtopics. | |
| """ | |
| sections: List[Dict] = [] | |
| """ | |
| sections example: | |
| [ | |
| { | |
| "section_title": "Example Title", | |
| "written_content": "Example content" | |
| }, | |
| ... | |
| ] | |
| """ | |
| def _get_relevant_documents( | |
| self, query: str, *, run_manager: CallbackManagerForRetrieverRun | |
| ) -> List[Document]: | |
| docs = [ | |
| Document( | |
| page_content=page.get("written_content", ""), | |
| metadata={ | |
| "section_title": page.get("section_title", ""), | |
| }, | |
| ) | |
| for page in self.sections # Changed 'self.pages' to 'self.sections' | |
| ] | |
| return docs |