Spaces:

sabazo
/

insurance_advisor_wb

Sleeping

App Files Files Community

Asaad Almutareb commited on Jul 3, 2024

Commit

f66560f

1 Parent(s): 8145c48

initial advanced rag chain

Browse files

Files changed (19) hide show

.devcontainer/Dockerfile +15 -0
.devcontainer/devcontainer.json +32 -0
.github/dependabot.yml +12 -0
.github/workflows/pylint.yml +23 -0
CONTRIBUTION.md +21 -29
Core_Advanced_RAG_components.ipynb +392 -0
README.md +33 -32
core-langchain-rag.py +267 -0
docs/advanced_rag_architecture.drawio +115 -0
docs/data_flow_diagram.drawio.png +0 -0
docs/template.md +16 -0
docs/workflow-advanced-rag.drawio +83 -0
rag-system-anatomy/build_vector_store.py +46 -0
rag-system-anatomy/create_embedding.py +48 -0
rag-system-anatomy/get_db_retriever.py +29 -0
rag-system-anatomy/load_data_from_urls.py +32 -0
rag-system-anatomy/load_example_embeddings.py +37 -0
requirements.txt +12 -0
vectorstore/placeholder.txt +1 -0

.devcontainer/Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+ARG VARIANT="3.10-bookworm"
+FROM mcr.microsoft.com/devcontainers/python:1-${VARIANT}
+#FROM langchain/langchain
+# [Optional] Uncomment if you want to install an additional version of node using nvm
+# ARG EXTRA_NODE_VERSION=10
+# RUN su node -c "source /usr/local/share/nvm/nvm.sh && nvm install ${EXTRA_NODE_VERSION}"
+# [Optional] Uncomment if you want to install more global node modules
+# RUN su node -c "npm install -g <your-package-list-here>"
+#COPY library-scripts/github-debian.sh /tmp/library-scripts/
+RUN apt-get update && apt-get update
+RUN pip install --upgrade pip
+#RUN pip install -r requirements.txt

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,32 @@

+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/python
+{
+	"name": "Python 3.10",
+	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+	//"image": "mcr.microsoft.com/devcontainers/python:1-3.10-bookworm"
+	// build config for the docker image instead:
+	"build": { "dockerfile": "Dockerfile" },
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+	// Use 'postCreateCommand' to run commands after the container is created.
+	// "postCreateCommand": "pip3 install --user -r requirements.txt",
+	// Configure tool-specific properties.
+	"customizations": {
+		 // Configure properties specific to VS Code.
+		 "vscode": {
+			//Add the IDs of extensions you want installed when the container is created.
+			"extensions": ["ms-azuretools.vscode-docker", "ms-python.python", "qwtel.sqlite-viewer"]
+		  }
+	}//,
+	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "root"
+}

.github/dependabot.yml ADDED Viewed

	@@ -0,0 +1,12 @@

+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for more information:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+# https://containers.dev/guide/dependabot
+version: 2
+updates:
+ - package-ecosystem: "devcontainers"
+   directory: "/"
+   schedule:
+     interval: weekly

.github/workflows/pylint.yml ADDED Viewed

	@@ -0,0 +1,23 @@

+name: Pylint
+on: [push]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint $(git ls-files '*.py')

CONTRIBUTION.md CHANGED Viewed

@@ -1,36 +1,28 @@
-# Contributing to The Project
-We welcome contributions from everyone. To make the process as smooth as possible, please follow the guidelines below.
-## How to Contribute
-### Code Contributions
-1. **Creating a Branch**: For any code contribution, start by creating a branch from the main project. The branch should be named using the pattern `#issue-id short-name-to-describe-what-is-added`. This helps in linking the contributions directly to the issues they are meant to resolve.
-2. **Pull Requests**: Once you've made your changes, create a pull request (PR). Please fill out the pull request template with all the required information. This includes a summary of your changes and why they are necessary.
-3. **Review Process**: Every code contribution requires a review. A minimum of one maintainer must review and approve your contribution before it can be merged. This ensures quality and adherence to the project's standards. Please ensure your code is well-documented through comments and, if necessary, additional documentation files.
-4. **Testing**: Your code must be tested to ensure it works as expected and does not introduce new issues.
-### Reporting Issues
-When reporting issues, please use the provided issue template. Your report should include:
-- A clear, descriptive title
-- A detailed description of the issue
-- Steps to reproduce the issue
-- Logs, if applicable
-- Screenshots, if applicable
-This information is crucial in diagnosing and fixing the issue you're experiencing.
-### Suggestions
-We're always looking for new ideas to improve our project. If you have a suggestion, please:
-- Clearly describe your suggestion, including the purpose and intended outcome.
-- Explain why you believe this change would be beneficial to the project.
-We appreciate your contributions and look forward to collaborating with you!

+# Pull Request Template
+## Description
+Please include a brief description of the changes introduced by this PR.
+## Related Issue(s)
+- If this PR addresses a particular issue, please reference it here using GitHub's linking syntax, e.g., "Fixes #123".
+- If there's no related issue, briefly explain the motivation behind these changes.
+## Changes Made
+Please provide a list of the changes made in this PR.
+## Screenshots (if applicable)
+If the changes include UI updates or visual changes, please attach relevant screenshots here.
+## Checklist
+- [ ] I have tested my changes locally and ensured that they work as expected.
+- [ ] I have updated the documentation (if applicable).
+- [ ] My code follows the project's coding conventions and style guidelines.
+- [ ] I have added appropriate test cases (if applicable).
+- [ ] I have reviewed my own code to ensure its quality.
+## Additional Notes
+Add any additional notes or context about this PR here.
+## Reviewer(s)
+- @reviewer1
+- @reviewer2

Core_Advanced_RAG_components.ipynb ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4",
+      "authorship_tag": "ABX9TyNTRxOWLfv3tkZHe66pK63p",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/almutareb/advanced-rag-system-anatomy/blob/main/Core_Advanced_RAG_components.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Install requirements"
+      ],
+      "metadata": {
+        "id": "Hz8JZq6Ob8rt"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import sys\n",
+        "import os\n",
+        "!pip install -qU langchain langchain-community --no-warn-script-location > /dev/null\n",
+        "!pip install -qU beautifulsoup4 --no-warn-script-location > /dev/null\n",
+        "!pip install -qU faiss-cpu --no-warn-script-location > /dev/null\n",
+        "# use the gpu optimized version of FAISS for better performance\n",
+        "#!pip install -qU faiss-gpu --no-warn-script-location > /dev/null\n",
+        "!pip install -qU chromadb --no-warn-script-location > /dev/null\n",
+        "!pip install -qU validators --no-warn-script-location > /dev/null\n",
+        "!pip install -qU sentence_transformers typing-extensions==4.8.0 unstructured --no-warn-script-location > /dev/null\n",
+        "!pip install -qU gradio==3.48.0 --no-warn-script-location > /dev/null"
+      ],
+      "metadata": {
+        "id": "SXTdFuTvboyV"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Download Documents"
+      ],
+      "metadata": {
+        "id": "pETUBgFAk4Fx"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n",
+        "from bs4 import BeautifulSoup as Soup\n",
+        "\n",
+        "# List of URLs to scrape\n",
+        "urls = [\"https://langchain-doc.readthedocs.io/en/latest\"\n",
+        "        \"https://python.langchain.com/docs/get_started\"]\n",
+        "\n",
+        "# Initialize an empty list to store the documents\n",
+        "docs = []\n",
+        "# Looping through each URL in the list - this could take some time!\n",
+        "for url in urls:\n",
+        "  # max_depth set to 2 for demo purpose, should be increased for real scenario results, e.g. at least 5\n",
+        "  loader = RecursiveUrlLoader(url=url, max_depth=4, extractor=lambda x: Soup(x, \"html.parser\").text)\n",
+        "  docs.extend(loader.load())\n",
+        "print(f'Downloaded a total of {len(docs)} documents')"
+      ],
+      "metadata": {
+        "id": "eVav9lGgk3X3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Chunking documents"
+      ],
+      "metadata": {
+        "id": "0iurKj94w1jm"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+        "import time\n",
+        "\n",
+        "text_splitter = RecursiveCharacterTextSplitter(\n",
+        "    chunk_size = 500,       # The size of each text chunk\n",
+        "    chunk_overlap  = 50,    # Overlap between chunks to ensure continuity\n",
+        ")\n",
+        "\n",
+        "# Stage one: read all the docs, split them into chunks.\n",
+        "st = time.time() # Start time for performance measurement\n",
+        "print('Loading documents ...')\n",
+        "\n",
+        "# Split each document into chunks using the configured text splitter\n",
+        "chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])\n",
+        "\n",
+        "et = time.time() - st # Calculate time taken for splitting\n",
+        "print(f'created {len(chunks)} chunks in {et} seconds.')"
+      ],
+      "metadata": {
+        "id": "zSZJQeA_w2B3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Build VectorStore: Vectorization"
+      ],
+      "metadata": {
+        "id": "oQGtHuTxkmFq"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from langchain.vectorstores import FAISS\n",
+        "from langchain.vectorstores.utils import filter_complex_metadata\n",
+        "from langchain.embeddings import HuggingFaceEmbeddings\n",
+        "\n",
+        "# Path for saving the FAISS index\n",
+        "FAISS_INDEX_PATH = \"./vectorstore/lc-faiss-multi-mpnet-500\"\n",
+        "\n",
+        "\n",
+        "#Stage two: embed the docs.\n",
+        "# use multi-qa-mpnet-base-dot-v1 sentence transformer to convert pieces of text in vectors to store them in the vector store\n",
+        "model_name = \"sentence-transformers/multi-qa-mpnet-base-dot-v1\"\n",
+        "\n",
+        "# use the GPU for faster processing\n",
+        "#model_kwargs = {\"device\": \"cuda\"}\n",
+        "\n",
+        "# Initialize HuggingFace embeddings with the specified model\n",
+        "embeddings = HuggingFaceEmbeddings(\n",
+        "    model_name=model_name,\n",
+        "#    model_kwargs=model_kwargs  # uncomment when using a GPU, like T4 - requires extended RAM!\n",
+        "    )\n",
+        "\n",
+        "print(f'Loading chunks into vector store ...')\n",
+        "st = time.time() # Start time for performance measurement\n",
+        "\n",
+        "# Create a FAISS vector store from the document chunks and save it locally\n",
+        "db = FAISS.from_documents(filter_complex_metadata(chunks), embeddings)\n",
+        "# persist vectorstore\n",
+        "db.save_local(FAISS_INDEX_PATH)\n",
+        "\n",
+        "et = time.time() - st\n",
+        "print(f'Time taken: {et} seconds.')"
+      ],
+      "metadata": {
+        "id": "qu6sDsq6c9fg"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Load LLM"
+      ],
+      "metadata": {
+        "id": "updDdzwj0RdJ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from dotenv import load_dotenv\n",
+        "# HF libraries\n",
+        "from langchain.llms import HuggingFaceHub\n",
+        "\n",
+        "# Load environment variables from a .env file\n",
+        "CONFIG = load_dotenv(\".env\")\n",
+        "\n",
+        "# Retrieve the Hugging Face API token from environment variables\n",
+        "HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')\n",
+        "\n",
+        "# load HF Token\n",
+        "HUGGINGFACEHUB_API_TOKEN=os.getenv('HUGGINGFACEHUB_API_TOKEN')\n",
+        "\n",
+        "# Load the model from the Hugging Face Hub\n",
+        "model_id = HuggingFaceHub(repo_id=\"mistralai/Mistral-7B-Instruct-v0.1\", model_kwargs={\n",
+        "    \"temperature\":0.1,\n",
+        "    \"max_new_tokens\":1024,\n",
+        "    \"repetition_penalty\":1.2,\n",
+        "    \"return_full_text\":False\n",
+        "    })\n"
+      ],
+      "metadata": {
+        "id": "GlnNrNdbg2E6"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Retriever"
+      ],
+      "metadata": {
+        "id": "2m3BIm090jtr"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from langchain.embeddings import HuggingFaceHubEmbeddings\n",
+        "# vectorestore\n",
+        "from langchain.vectorstores import FAISS\n",
+        "\n",
+        "# Load and Initialize the vector store as a retriever for the RAG pipeline\n",
+        "db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)\n",
+        "\n",
+        "retriever = db.as_retriever()"
+      ],
+      "metadata": {
+        "id": "jzqPsuds0kSs"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Template and Chat logic"
+      ],
+      "metadata": {
+        "id": "Bld8lOEv0Uq-"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# retrieval chain\n",
+        "from langchain.chains import RetrievalQA\n",
+        "# prompt template\n",
+        "from langchain.prompts import PromptTemplate\n",
+        "from langchain.memory import ConversationBufferMemory\n",
+        "\n",
+        "\n",
+        "global qa\n",
+        "template = \"\"\"\n",
+        "You are the friendly documentation buddy Arti, who helps novice programmers in using LangChain with simple explanations and examples.\\\n",
+        "    Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question :\n",
+        "------\n",
+        "<ctx>\n",
+        "{context}\n",
+        "</ctx>\n",
+        "------\n",
+        "<hs>\n",
+        "{history}\n",
+        "</hs>\n",
+        "------\n",
+        "{question}\n",
+        "Answer:\n",
+        "\"\"\"\n",
+        "# Create a PromptTemplate object with specified input variables and the defined template\n",
+        "prompt = PromptTemplate.from_template(\n",
+        "    template=template,\n",
+        ")\n",
+        "prompt.format(context=\"context\", history=\"history\", question=\"question\")\n",
+        "\n",
+        "# Create a memory buffer to manage conversation history\n",
+        "memory = ConversationBufferMemory(memory_key=\"history\", input_key=\"question\")\n",
+        "\n",
+        "# Initialize the RetrievalQA object with the specified model,\n",
+        "# retriever, and additional configurations\n",
+        "qa = RetrievalQA.from_chain_type(llm=model_id, chain_type=\"stuff\", retriever=retriever, verbose=True, return_source_documents=True, chain_type_kwargs={\n",
+        "    \"verbose\": True,\n",
+        "    \"memory\": memory,\n",
+        "    \"prompt\": prompt\n",
+        "}\n",
+        "    )"
+      ],
+      "metadata": {
+        "id": "K255Ldxq0Xg6"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "UI - Gradio"
+      ],
+      "metadata": {
+        "id": "pA5d0LL2kObx"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "history=[]\n",
+        "query=\"draft a function to calculate a mxn matrix\"\n",
+        "question=query\n",
+        "response=qa({\"query\": query, \"history\": history, \"question\": question})\n",
+        "print(*response)"
+      ],
+      "metadata": {
+        "id": "bKeoyhXPrQ2C"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(response['result'])"
+      ],
+      "metadata": {
+        "id": "78wRMjjn0cl3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import gradio as gr\n",
+        "\n",
+        "# Function to add a new input to the chat history\n",
+        "def add_text(history, text):\n",
+        "  # Append the new text to the history with a placeholder for the response\n",
+        "    history = history + [(text, None)]\n",
+        "    return history, \"\"\n",
+        "\n",
+        "# Function representing the bot's response mechanism\n",
+        "def bot(history):\n",
+        "    response = infer(history[-1][0], history)\n",
+        "    history[-1][1] = response['result']\n",
+        "    return history\n",
+        "\n",
+        "# Function to infer the response using the RAG model\n",
+        "def infer(question, history):\n",
+        "    query =  question\n",
+        "    result = qa({\"query\": query, \"history\": history, \"question\": question})\n",
+        "    return result\n",
+        "\n",
+        "# Building the Gradio interface\n",
+        "with gr.Blocks() as demo:\n",
+        "    with gr.Column(elem_id=\"col-container\"):\n",
+        "        chatbot = gr.Chatbot([], elem_id=\"chatbot\")\n",
+        "        clear = gr.Button(\"Clear\")\n",
+        "\n",
+        "        # Create a row for the question input\n",
+        "        with gr.Row():\n",
+        "            question = gr.Textbox(label=\"Question\", placeholder=\"Type your question and hit Enter \")\n",
+        "\n",
+        "    # Define the action when the question is submitted\n",
+        "    question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(\n",
+        "        bot, chatbot, chatbot\n",
+        "    )\n",
+        "\n",
+        "    # Define the action for the clear button\n",
+        "    clear.click(lambda: None, None, chatbot, queue=False)\n",
+        "\n",
+        "# Launch the Gradio demo interface\n",
+        "demo.launch(share=False)"
+      ],
+      "metadata": {
+        "id": "OHVkFa6MkCir"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

README.md CHANGED Viewed

@@ -1,32 +1,33 @@
-# mail_automation_wf
-<img width="424" alt="image" src="https://github.com/almutareb/mail_automation_wf/assets/104657679/9fc2d76a-03fd-42f3-904f-0f6386d37091">
-## Overview
-This project aims to automate the processing of incoming emails by performing various tasks, including fetching emails, recognizing entities, classifying and summarizing content, enriching data, and drafting responses. This README provides detailed information on the project's goals, agents, tasks, and overall workflow.
-### Table of Contents
-- Goals
-- Agents and Tasks
-  - Email Fetcher
-  - Attachment Downloader
-  - Attachment Processor
-  - Entity Recognizer
-  - Classifier
-  - Data Enricher
-  - Data Checker
-  - Historical Data Fetcher
-  - Email Writer
-  - Quality Checker
-  - Email Saver
-- Workflow
-- Installation
-- Configuration
-- Usage
-- Contributing
-- License

+# Anatomy of Advanced Enterprise Rag Systems
+This repository accompanies the blog series "The Anatomy of Advanced Enterprise Rag Systems" and provides a hands-on learning experience for building sophisticated Rag systems. Dive deep into each component, from setup and evaluation to security and multi-agent interactions.
+Explore these key topics:
+- Test Setup and Evaluation Metrics: Learn how to assess the performance and effectiveness of your Rag system.
+- Data Preparation and Management: Discover techniques for organizing and optimizing your knowledge base.
+- User Input Processing: Understand how to handle diverse user queries and extract relevant information.
+- Retrieval System: Unleash the power of retrieving relevant passages from your knowledge base.
+- Information Processing and Generation: Craft accurate and informative responses using state-of-the-art techniques.
+- Feedback and Continuous Improvement: Enhance your Rag system over time using user feedback and data analysis.
+- Multi-agents and Agent-services: Explore advanced architectures for distributed and collaborative Rag systems.
+- Monitoring and Security: Ensure the robustness and trustworthiness of your Rag system with proper monitoring and security practices.
+What you'll find here:
+- Code examples: Implementations of key concepts from each topic, ready to use and adapt.
+- Data samples: Pre-prepared data sets for experimentation and testing.
+- Additional resources: Links to relevant articles, libraries, and tools to deepen your understanding.
+Getting started:
+- Clone this repository: git clone https://github.com/<username>/advanced-enterprise-rag-systems.git
+- Follow the instructions in each topic directory.
+Contributing:
+We welcome your contributions! Share your expertise, improve existing code examples, or add new ones. Submit a pull request to share your valuable additions.
+License:
+This project is licensed under the MIT License: LICENSE.

core-langchain-rag.py ADDED Viewed

	@@ -0,0 +1,267 @@

+# Importing necessary libraries
+import sys
+import os
+import time
+# # Importing RecursiveUrlLoader for web scraping and BeautifulSoup for HTML parsing
+# from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
+# from bs4 import BeautifulSoup as Soup
+# import mimetypes
+# # List of URLs to scrape
+# urls = ["https://langchain-doc.readthedocs.io/en/latest"]
+# # Initialize an empty list to store the documents
+# docs = []
+# # Looping through each URL in the list - this could take some time!
+# stf = time.time()  # Start time for performance measurement
+# for url in urls:
+#     try:
+#         st = time.time()  # Start time for performance measurement
+#         # Create a RecursiveUrlLoader instance with a specified URL and depth
+#         # The extractor function uses BeautifulSoup to parse the HTML content and extract text
+#         loader = RecursiveUrlLoader(url=url, max_depth=5, extractor=lambda x: Soup(x, "html.parser").text)
+#         # Load the documents from the URL and extend the docs list
+#         docs.extend(loader.load())
+#         et = time.time() - st  # Calculate time taken for splitting
+#         print(f'Time taken for downloading documents from {url}: {et} seconds.')
+#     except Exception as e:
+#         # Print an error message if there is an issue with loading or parsing the URL
+#         print(f"Failed to load or parse the URL {url}. Error: {e}", file=sys.stderr)
+# etf = time.time() - stf  # Calculate time taken for splitting
+# print(f'Total time taken for downloading {len(docs)} documents: {etf} seconds.')
+# # Import necessary modules for text splitting and vectorization
+# from langchain.text_splitter import RecursiveCharacterTextSplitter
+# import time
+# from langchain_community.vectorstores import FAISS
+# from langchain.vectorstores.utils import filter_complex_metadata
+# from langchain_community.embeddings import HuggingFaceEmbeddings
+# # Configure the text splitter
+# text_splitter = RecursiveCharacterTextSplitter(
+#     separators=["\n\n", "\n", "(?<=\. )", " ", ""],  # Define the separators for splitting text
+#     chunk_size=500,  # The size of each text chunk
+#     chunk_overlap=50,  # Overlap between chunks to ensure continuity
+#     length_function=len,  # Function to determine the length of each chunk
+# )
+# try:
+#     # Stage one: Splitting the documents into chunks for vectorization
+#     st = time.time()  # Start time for performance measurement
+#     print('Loading documents and creating chunks ...')
+#     # Split each document into chunks using the configured text splitter
+#     chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
+#     et = time.time() - st  # Calculate time taken for splitting
+#     print(f"created "+chunks+" chunks")
+#     print(f'Time taken for document chunking: {et} seconds.')
+# except Exception as e:
+#     print(f"Error during document chunking: {e}", file=sys.stderr)
+# # Path for saving the FAISS index
+# FAISS_INDEX_PATH = "./vectorstore/lc-faiss-multi-mpnet-500"
+# try:
+#     # Stage two: Vectorization of the document chunks
+#     model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"  # Model used for embedding
+#     # Initialize HuggingFace embeddings with the specified model
+#     embeddings = HuggingFaceEmbeddings(model_name=model_name)
+#     print(f'Loading chunks into vector store ...')
+#     st = time.time()  # Start time for performance measurement
+#     # Create a FAISS vector store from the document chunks and save it locally
+#     db = FAISS.from_documents(filter_complex_metadata(chunks), embeddings)
+#     db.save_local(FAISS_INDEX_PATH)
+#     et = time.time() - st  # Calculate time taken for vectorization
+#     print(f'Time taken for vectorization and saving: {et} seconds.')
+# except Exception as e:
+#     print(f"Error during vectorization or FAISS index saving: {e}", file=sys.stderr)
+# alternatively download a preparaed vectorized index from S3 and load the index into vectorstore
+# Import necessary libraries for AWS S3 interaction, file handling, and FAISS vector stores
+import boto3
+from botocore import UNSIGNED
+from botocore.client import Config
+import zipfile
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+from dotenv import load_dotenv
+# Load environment variables from a .env file
+config = load_dotenv(".env")
+# Retrieve the Hugging Face API token from environment variables
+HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+S3_LOCATION = os.getenv("S3_LOCATION")
+try:
+    # Initialize an S3 client with unsigned configuration for public access
+    s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
+    # Define the FAISS index path and the destination for the downloaded file
+    FAISS_INDEX_PATH = './vectorstore/lc-faiss-multi-mpnet-500-markdown'
+    VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
+    # Download the pre-prepared vectorized index from the S3 bucket
+    print("Downloading the pre-prepared vectorized index from S3...")
+    s3.download_file(S3_LOCATION, 'vectorstores/lc-faiss-multi-mpnet-500-markdown.zip', VS_DESTINATION)
+    # Extract the downloaded zip file
+    with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
+        zip_ref.extractall('./vectorstore/')
+    print("Download and extraction completed.")
+except Exception as e:
+    print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
+# Define the model name for embeddings
+model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
+try:
+    # Initialize HuggingFace embeddings with the specified model
+    embeddings = HuggingFaceEmbeddings(model_name=model_name)
+    # Load the local FAISS index with the specified embeddings
+    db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
+    print("FAISS index loaded successfully.")
+except Exception as e:
+    print(f"Error during FAISS index loading: {e}", file=sys.stderr)
+# Import necessary modules for environment variable management and HuggingFace integration
+from langchain_huggingface import HuggingFaceEndpoint
+# Initialize the vector store as a retriever for the RAG pipeline
+retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 3, 'lambda_mult': 0.25})
+try:
+    # Load the model from the Hugging Face Hub
+    model_id = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
+        temperature=0.1,         # Controls randomness in response generation (lower value means less random)
+        max_new_tokens=1024,     # Maximum number of new tokens to generate in responses
+        repetition_penalty=1.2,  # Penalty for repeating the same words (higher value increases penalty)
+        return_full_text=False   # If False, only the newly generated text is returned; if True, the input is included as well
+    )
+    print("Model loaded successfully from Hugging Face Hub.")
+except Exception as e:
+    print(f"Error loading model from Hugging Face Hub: {e}", file=sys.stderr)
+# Importing necessary modules for retrieval-based question answering and prompt handling
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.memory import ConversationBufferMemory
+# Declare a global variable 'qa' for the retrieval-based question answering system
+global qa
+# Define a prompt template for guiding the model's responses
+template = """
+You are the friendly documentation buddy Arti, if you don't know the answer say 'I don't know' and don't make things up.\
+    Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question :
+------
+<ctx>
+{context}
+</ctx>
+------
+<hs>
+{history}
+</hs>
+------
+{question}
+Answer:
+"""
+# Create a PromptTemplate object with specified input variables and the defined template
+prompt = PromptTemplate.from_template(
+    #input_variables=["history", "context", "question"],  # Variables to be included in the prompt
+    template=template,  # The prompt template as defined above
+)
+prompt.format(context="context", history="history", question="question")
+# Create a memory buffer to manage conversation history
+memory = ConversationBufferMemory(
+    memory_key="history",  # Key for storing the conversation history
+    input_key="question"  # Key for the input question
+)
+# Initialize the RetrievalQA object with the specified model, retriever, and additional configurations
+qa = RetrievalQA.from_chain_type(
+    llm=model_id,  # Language model loaded from Hugging Face Hub
+    retriever=retriever,  # The vector store retriever initialized earlier
+    return_source_documents=True,  # Option to return source documents along with responses
+    chain_type_kwargs={
+        "verbose": True,  # Enables verbose output for debugging and analysis
+        "memory": memory,  # Memory buffer for managing conversation history
+        "prompt": prompt  # Prompt template for guiding the model's responses
+    }
+)
+# Import Gradio for UI, along with other necessary libraries
+import gradio as gr
+import random
+import time
+# Function to add a new input to the chat history
+def add_text(history, text):
+    # Append the new text to the history with a placeholder for the response
+    history = history + [(text, None)]
+    return history, ""
+# Function representing the bot's response mechanism
+def bot(history):
+    # Obtain the response from the 'infer' function using the latest input
+    response = infer(history[-1][0], history)
+    sources = [doc.metadata.get("source") for doc in response['source_documents']]
+    src_list = '\n'.join(sources)
+    print_this = response['result'] + "\n\n\n Sources: \n\n\n" + src_list
+    history[-1][1] = print_this #response['answer']
+    # Update the history with the bot's response
+    #history[-1][1] = response['result']
+    return history
+# Function to infer the response using the RAG model
+def infer(question, history):
+    # Use the question and history to query the RAG model
+    result = qa({"query": question, "history": history, "question": question})
+    return result
+# CSS styling for the Gradio interface
+css = """
+#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
+"""
+# HTML content for the Gradio interface title
+title = """
+<div style="text-align: center;max-width: 700px;">
+    <h1>Chat with your Documentation</h1>
+    <p style="text-align: center;">Chat with LangChain Documentation, <br />
+    You can ask questions about the LangChain docu ;)</p>
+</div>
+"""
+# Building the Gradio interface
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.HTML(title)  # Add the HTML title to the interface
+        chatbot = gr.Chatbot([], elem_id="chatbot")  # Initialize the chatbot component
+        clear = gr.Button("Clear")  # Add a button to clear the chat
+        # Create a row for the question input
+        with gr.Row():
+            question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
+    # Define the action when the question is submitted
+    question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
+        bot, chatbot, chatbot
+    )
+    # Define the action for the clear button
+    clear.click(lambda: None, None, chatbot, queue=False)
+# Launch the Gradio demo interface
+demo.launch(share=False)

docs/advanced_rag_architecture.drawio ADDED Viewed

	@@ -0,0 +1,115 @@

+<mxfile host="app.diagrams.net" modified="2024-02-02T10:57:09.662Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0" etag="jBjrxQrE8FMZUdqYmkDs" version="22.1.21" type="github">
+  <diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">
+    <mxGraphModel dx="1434" dy="774" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
+      <root>
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-0" />
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-1" parent="WIyWlLk6GJQsqaUBKTNV-0" />
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-59" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-4" target="7HGE-dyt3ShhVV6eNgTS-22">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-4" value="Retrieval System" style="swimlane;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;startSize=23;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="280" y="360" width="290" height="280" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-7" value="&lt;div&gt;Indicies&lt;/div&gt;" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.database;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
+          <mxGeometry x="44" y="40" width="60" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-32" value="Re-ranking" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
+          <mxGeometry x="14" y="130" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-34" value="Hypothetical Questions and HyDE" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
+          <mxGeometry x="160" y="130" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-37" value="Fine-tuning Embeddings" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
+          <mxGeometry x="14" y="195" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-33" value="Hyperparamter Tuning" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
+          <mxGeometry x="160" y="50" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-56" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=1;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-14" target="7HGE-dyt3ShhVV6eNgTS-19">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-14" value="&lt;div&gt;Data Preparation and Management&lt;/div&gt;" style="swimlane;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="40" y="40" width="360" height="240" as="geometry">
+            <mxRectangle x="410" y="40" width="240" height="30" as="alternateBounds" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-23" value="Chunking &amp;amp; Vectorization" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
+          <mxGeometry x="14" y="50" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-24" value="Metadata and Summaries" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
+          <mxGeometry x="161" y="50" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-25" value="User Profile Management" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
+          <mxGeometry x="14" y="190" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-26" value="Data Cleaning" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
+          <mxGeometry x="14" y="120" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-31" value="Complex Formats Handling" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
+          <mxGeometry x="161" y="120" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-57" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-19" target="7HGE-dyt3ShhVV6eNgTS-4">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-19" value="&lt;div&gt;User Input processing&lt;/div&gt;" style="swimlane;horizontal=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="40" y="360" width="200" height="280" as="geometry" />
+        </mxCell>
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-3" value="User Authentication" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" parent="7HGE-dyt3ShhVV6eNgTS-19" vertex="1">
+          <mxGeometry x="40" y="20" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-7" value="Query Rewriter" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" parent="7HGE-dyt3ShhVV6eNgTS-19" vertex="1">
+          <mxGeometry x="40" y="80" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-8" value="Input Guardrail" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-19">
+          <mxGeometry x="40" y="140" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-5" value="chat history&amp;nbsp; " style="strokeWidth=2;html=1;shape=mxgraph.flowchart.multi-document;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-19">
+          <mxGeometry x="56" y="210" width="88" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-60" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0;exitDx=0;exitDy=0;entryX=1;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-22" target="7HGE-dyt3ShhVV6eNgTS-44">
+          <mxGeometry relative="1" as="geometry">
+            <mxPoint x="790" y="280" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-22" value="Information Processing and Generation" style="swimlane;horizontal=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="600" y="360" width="200" height="280" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-9" value="Response Generation" style="rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
+          <mxGeometry x="40" y="20" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-28" value="Output Guardrails and Moderation" style="rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
+          <mxGeometry x="40" y="80" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-29" value="Caching" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.multi-document;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
+          <mxGeometry x="56" y="140" width="88" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-30" value="Personalization and Customization" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
+          <mxGeometry x="40" y="215" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-61" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=1;entryY=0;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-44" target="7HGE-dyt3ShhVV6eNgTS-14">
+          <mxGeometry relative="1" as="geometry">
+            <Array as="points">
+              <mxPoint x="440" y="40" />
+              <mxPoint x="440" y="40" />
+            </Array>
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-44" value="Feedback and Continuous Improvement" style="swimlane;whiteSpace=wrap;html=1;startSize=23;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="480" y="40" width="320" height="240" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-46" value="Data Refinement" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
+          <mxGeometry x="170" y="50" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-47" value="System Monitoring" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
+          <mxGeometry x="170" y="120" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-48" value="Generation Evaluation" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
+          <mxGeometry x="20" y="120" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="7HGE-dyt3ShhVV6eNgTS-49" value="User Feedback" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
+          <mxGeometry x="20" y="50" width="120" height="40" as="geometry" />
+        </mxCell>
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>

docs/data_flow_diagram.drawio.png ADDED Viewed

docs/template.md ADDED Viewed

	@@ -0,0 +1,16 @@

+# Template
+1. **Architecture of advanced RAG**
+2. **Test setup and Evaluation metrics**
+3. **Data preparation (vectorization & chunking)**
+4. **Search indexing**
+5. **Query transformation**
+6. **Chat logic and query routing**
+7. **Multi agents and agent-services**
+8. **Monitoring responses and adding security**
+## Additional Resources
+[Enterprise Rag](https://www.rungalileo.io/blog/mastering-rag-how-to-architect-an-enterprise-rag-systemhttps://www.rungalileo.io/blog/mastering-rag-how-to-architect-an-enterprise-rag-system)
+[Advanced RAG](https://medium.com/towards-artificial-intelligence/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6)

docs/workflow-advanced-rag.drawio ADDED Viewed

	@@ -0,0 +1,83 @@

+<mxfile host="app.diagrams.net" modified="2024-02-02T11:21:08.029Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0" etag="EvpGiXuqtWkE4FAqL8_g" version="22.1.21" type="github">
+  <diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">
+    <mxGraphModel dx="1434" dy="774" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
+      <root>
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-0" />
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-1" parent="WIyWlLk6GJQsqaUBKTNV-0" />
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-1" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-3" target="eFb6EC-VP60E3mpf6WAh-0">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-3" value="User Authentication and Input" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#d5e8d4;strokeColor=#82b366;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
+          <mxGeometry x="24" y="80" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-12" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-7" target="WIyWlLk6GJQsqaUBKTNV-11">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-7" value="Query Processing" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#d5e8d4;strokeColor=#82b366;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
+          <mxGeometry x="180" y="160" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-11" target="eFb6EC-VP60E3mpf6WAh-3">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="WIyWlLk6GJQsqaUBKTNV-11" value="Data Preparation and Management" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#ffe6cc;strokeColor=#d79b00;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
+          <mxGeometry x="330" y="160" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-0" target="WIyWlLk6GJQsqaUBKTNV-7">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-0" value="Input Guardrails" style="whiteSpace=wrap;html=1;rounded=1;glass=0;strokeWidth=1;shadow=0;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="24" y="160" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-3" target="eFb6EC-VP60E3mpf6WAh-5">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-3" value="Retrieval System" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="480" y="160" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-5" target="eFb6EC-VP60E3mpf6WAh-9">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-18" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=0.75;entryY=1;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-5" target="eFb6EC-VP60E3mpf6WAh-6">
+          <mxGeometry relative="1" as="geometry">
+            <Array as="points">
+              <mxPoint x="630" y="130" />
+              <mxPoint x="450" y="130" />
+            </Array>
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-5" value="Information Processing and Augmentation" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="630" y="160" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-6" value="Observability and Feedback" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="360" y="40" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.98;exitY=0.02;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.575;entryY=1;entryDx=0;entryDy=0;dashed=1;entryPerimeter=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-8" target="eFb6EC-VP60E3mpf6WAh-3">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-8" value="Caching" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.multi-document;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="376" y="240" width="88" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-9" target="eFb6EC-VP60E3mpf6WAh-6">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-9" value="Output and Response" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
+          <mxGeometry x="630" y="80" width="120" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0;exitDx=0;exitDy=0;entryX=0.325;entryY=0.975;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-0" target="eFb6EC-VP60E3mpf6WAh-6">
+          <mxGeometry relative="1" as="geometry">
+            <Array as="points">
+              <mxPoint x="144" y="130" />
+              <mxPoint x="399" y="130" />
+            </Array>
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.75;exitY=1;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-7" target="eFb6EC-VP60E3mpf6WAh-8">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="eFb6EC-VP60E3mpf6WAh-20" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-9" target="eFb6EC-VP60E3mpf6WAh-8">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>

rag-system-anatomy/build_vector_store.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# vectorization functions
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import ReadTheDocsLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from create_embedding import create_embeddings
+import time
+def build_vector_store(
+        docs: list,
+        db_path: str,
+        embedding_model: str,
+        new_db:bool=False,
+        chunk_size:int=500,
+        chunk_overlap:int=50,
+        ):
+    """
+    """
+    if db_path is None:
+        FAISS_INDEX_PATH = "./vectorstore/py-faiss-multi-mpnet-500"
+    else:
+        FAISS_INDEX_PATH = db_path
+    embeddings,chunks = create_embeddings(docs, embedding_model, chunk_size, chunk_overlap)
+    #load chunks into vector store
+    print(f'Loading chunks into faiss vector store ...')
+    st = time.time()
+    if new_db:
+        db_faiss = FAISS.from_documents(chunks, embeddings)
+    else:
+        db_faiss = FAISS.add_documents(chunks, embeddings)
+    db_faiss.save_local(FAISS_INDEX_PATH)
+    et = time.time() - st
+    print(f'Time taken: {et} seconds.')
+    #print(f'Loading chunks into chroma vector store ...')
+    #st = time.time()
+    #persist_directory='./vectorstore/py-chroma-multi-mpnet-500'
+    #db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory)
+    #et = time.time() - st
+    #print(f'Time taken: {et} seconds.')
+    result = f"built vectore store at {FAISS_INDEX_PATH}"
+    return result

rag-system-anatomy/create_embedding.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# embeddings functions
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import ReadTheDocsLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+import time
+from langchain_core.documents import Document
+def create_embeddings(
+        docs: list[Document],
+        chunk_size:int,
+        chunk_overlap:int,
+        embedding_model: str = "sentence-transformers/multi-qa-mpnet-base-dot-v1",
+        ):
+    """given a sequence of `Document` objects this fucntion will
+    generate embeddings for it.
+    ## argument
+    :params docs (list[Document]) -> list of `list[Document]`
+    :params chunk_size (int) -> chunk size in which documents are chunks
+    :params chunk_overlap (int) -> the amount of token that will be overlapped between chunks
+    :params embedding_model (str) -> the huggingspace model that will embed the documents
+    ## Return
+    Tuple of embedding and chunks
+    """
+    text_splitter = RecursiveCharacterTextSplitter(
+        separators=["\n\n", "\n", "(?<=\. )", " ", ""],
+        chunk_size = chunk_size,
+        chunk_overlap  = chunk_overlap,
+        length_function = len,
+    )
+    # Stage one: read all the docs, split them into chunks.
+    st = time.time()
+    print('Loading documents ...')
+    chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
+    et = time.time() - st
+    print(f'Time taken: {et} seconds.')
+    #Stage two: embed the docs.
+    embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
+    print(f"create a total of {len(chunks)}")
+    return embeddings,chunks

rag-system-anatomy/get_db_retriever.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# retriever and qa_chain function
+# HF libraries
+from langchain.llms import HuggingFaceHub
+from langchain.embeddings import HuggingFaceHubEmbeddings
+# vectorestore
+from langchain.vectorstores import FAISS
+# retrieval chain
+from langchain.chains import RetrievalQA
+# prompt template
+from langchain.prompts import PromptTemplate
+from langchain.memory import ConversationBufferMemory
+def get_db_retriever(vector_db:str=None):
+    model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
+    embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)
+    #db = Chroma(persist_directory="./vectorstore/lc-chroma-multi-mpnet-500", embedding_function=embeddings)
+    #db.get()
+    if not vector_db:
+        FAISS_INDEX_PATH='./vectorstore/py-faiss-multi-mpnet-500'
+    else:
+        FAISS_INDEX_PATH=vector_db
+    db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
+    retriever = db.as_retriever()
+    return retriever

rag-system-anatomy/load_data_from_urls.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# documents loader function
+from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
+from bs4 import BeautifulSoup as Soup
+from validators import url as url_validator
+from langchain_core.documents import Document
+def load_docs_from_urls(
+        urls: list = ["https://docs.python.org/3/"],
+        max_depth: int = 5,
+        ) -> list[Document]:
+    """
+    Load documents from a list of URLs.
+    ## Args:
+        urls (list, optional): A list of URLs to load documents from. Defaults to ["https://docs.python.org/3/"].
+        max_depth (int, optional): Maximum depth to recursively load documents from each URL. Defaults to 5.
+    ## Returns:
+        list: A list of documents loaded from the given URLs.
+    ## Raises:
+        ValueError: If any URL in the provided list is invalid.
+    """
+    docs = []
+    for url in urls:
+        if not url_validator(url):
+            raise ValueError(f"Invalid URL: {url}")
+        loader = RecursiveUrlLoader(url=url, max_depth=max_depth, extractor=lambda x: Soup(x, "html.parser").text)
+        docs.extend(loader.load())
+    print(f"loaded {len(docs)} pages")
+    return docs

rag-system-anatomy/load_example_embeddings.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# preprocessed vectorstore retrieval
+import boto3
+from botocore import UNSIGNED
+from botocore.client import Config
+import zipfile
+from langchain.vectorstores import FAISS
+from langchain.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+# access .env file
+s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
+model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
+#model_kwargs = {"device": "cuda"}
+embeddings = HuggingFaceEmbeddings(
+    model_name=model_name,
+#    model_kwargs=model_kwargs
+    )
+## FAISS
+FAISS_INDEX_PATH='./vectorstore/lc-faiss-multi-mpnet-500-markdown'
+VS_DESTINATION = FAISS_INDEX_PATH+".zip"
+s3.download_file('rad-rag-demos', 'vectorstores/lc-faiss-multi-mpnet-500-markdown.zip', VS_DESTINATION)
+with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
+    zip_ref.extractall('./vectorstore/')
+faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
+## Chroma DB
+chroma_directory="./vectorstore/lc-chroma-multi-mpnet-500-markdown"
+VS_DESTINATION = chroma_directory+".zip"
+s3.download_file('rad-rag-demos', 'vectorstores/lc-chroma-multi-mpnet-500-markdown.zip', VS_DESTINATION)
+with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
+    zip_ref.extractall('./vectorstore/')
+chromadb = Chroma(persist_directory=chroma_directory, embedding_function=embeddings)
+chromadb.get()

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+ langchain
+ langchain-community
+ langchain_huggingface
+ beautifulsoup4
+ faiss-cpu
+ chromadb
+ validators
+ sentence_transformers
+ typing-extensions
+ unstructured
+ gradio
+ boto3

vectorstore/placeholder.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ This file keeps the folder from being deleted for now