Spaces:
Sleeping
Sleeping
Asaad Almutareb
commited on
Commit
·
f66560f
1
Parent(s):
8145c48
initial advanced rag chain
Browse files- .devcontainer/Dockerfile +15 -0
- .devcontainer/devcontainer.json +32 -0
- .github/dependabot.yml +12 -0
- .github/workflows/pylint.yml +23 -0
- CONTRIBUTION.md +21 -29
- Core_Advanced_RAG_components.ipynb +392 -0
- README.md +33 -32
- core-langchain-rag.py +267 -0
- docs/advanced_rag_architecture.drawio +115 -0
- docs/data_flow_diagram.drawio.png +0 -0
- docs/template.md +16 -0
- docs/workflow-advanced-rag.drawio +83 -0
- rag-system-anatomy/build_vector_store.py +46 -0
- rag-system-anatomy/create_embedding.py +48 -0
- rag-system-anatomy/get_db_retriever.py +29 -0
- rag-system-anatomy/load_data_from_urls.py +32 -0
- rag-system-anatomy/load_example_embeddings.py +37 -0
- requirements.txt +12 -0
- vectorstore/placeholder.txt +1 -0
.devcontainer/Dockerfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG VARIANT="3.10-bookworm"
|
| 2 |
+
FROM mcr.microsoft.com/devcontainers/python:1-${VARIANT}
|
| 3 |
+
#FROM langchain/langchain
|
| 4 |
+
|
| 5 |
+
# [Optional] Uncomment if you want to install an additional version of node using nvm
|
| 6 |
+
# ARG EXTRA_NODE_VERSION=10
|
| 7 |
+
# RUN su node -c "source /usr/local/share/nvm/nvm.sh && nvm install ${EXTRA_NODE_VERSION}"
|
| 8 |
+
|
| 9 |
+
# [Optional] Uncomment if you want to install more global node modules
|
| 10 |
+
# RUN su node -c "npm install -g <your-package-list-here>"
|
| 11 |
+
|
| 12 |
+
#COPY library-scripts/github-debian.sh /tmp/library-scripts/
|
| 13 |
+
RUN apt-get update && apt-get update
|
| 14 |
+
RUN pip install --upgrade pip
|
| 15 |
+
#RUN pip install -r requirements.txt
|
.devcontainer/devcontainer.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
| 2 |
+
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
| 3 |
+
{
|
| 4 |
+
"name": "Python 3.10",
|
| 5 |
+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
| 6 |
+
//"image": "mcr.microsoft.com/devcontainers/python:1-3.10-bookworm"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
// build config for the docker image instead:
|
| 10 |
+
"build": { "dockerfile": "Dockerfile" },
|
| 11 |
+
|
| 12 |
+
// Features to add to the dev container. More info: https://containers.dev/features.
|
| 13 |
+
// "features": {},
|
| 14 |
+
|
| 15 |
+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
| 16 |
+
// "forwardPorts": [],
|
| 17 |
+
|
| 18 |
+
// Use 'postCreateCommand' to run commands after the container is created.
|
| 19 |
+
// "postCreateCommand": "pip3 install --user -r requirements.txt",
|
| 20 |
+
|
| 21 |
+
// Configure tool-specific properties.
|
| 22 |
+
"customizations": {
|
| 23 |
+
// Configure properties specific to VS Code.
|
| 24 |
+
"vscode": {
|
| 25 |
+
//Add the IDs of extensions you want installed when the container is created.
|
| 26 |
+
"extensions": ["ms-azuretools.vscode-docker", "ms-python.python", "qwtel.sqlite-viewer"]
|
| 27 |
+
}
|
| 28 |
+
}//,
|
| 29 |
+
|
| 30 |
+
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
| 31 |
+
// "remoteUser": "root"
|
| 32 |
+
}
|
.github/dependabot.yml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# To get started with Dependabot version updates, you'll need to specify which
|
| 2 |
+
# package ecosystems to update and where the package manifests are located.
|
| 3 |
+
# Please see the documentation for more information:
|
| 4 |
+
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
| 5 |
+
# https://containers.dev/guide/dependabot
|
| 6 |
+
|
| 7 |
+
version: 2
|
| 8 |
+
updates:
|
| 9 |
+
- package-ecosystem: "devcontainers"
|
| 10 |
+
directory: "/"
|
| 11 |
+
schedule:
|
| 12 |
+
interval: weekly
|
.github/workflows/pylint.yml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Pylint
|
| 2 |
+
|
| 3 |
+
on: [push]
|
| 4 |
+
|
| 5 |
+
jobs:
|
| 6 |
+
build:
|
| 7 |
+
runs-on: ubuntu-latest
|
| 8 |
+
strategy:
|
| 9 |
+
matrix:
|
| 10 |
+
python-version: ["3.8", "3.9", "3.10"]
|
| 11 |
+
steps:
|
| 12 |
+
- uses: actions/checkout@v3
|
| 13 |
+
- name: Set up Python ${{ matrix.python-version }}
|
| 14 |
+
uses: actions/setup-python@v3
|
| 15 |
+
with:
|
| 16 |
+
python-version: ${{ matrix.python-version }}
|
| 17 |
+
- name: Install dependencies
|
| 18 |
+
run: |
|
| 19 |
+
python -m pip install --upgrade pip
|
| 20 |
+
pip install pylint
|
| 21 |
+
- name: Analysing the code with pylint
|
| 22 |
+
run: |
|
| 23 |
+
pylint $(git ls-files '*.py')
|
CONTRIBUTION.md
CHANGED
|
@@ -1,36 +1,28 @@
|
|
| 1 |
-
#
|
| 2 |
|
| 3 |
-
|
|
|
|
| 4 |
|
| 5 |
-
##
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
When reporting issues, please use the provided issue template. Your report should include:
|
| 20 |
-
|
| 21 |
-
- A clear, descriptive title
|
| 22 |
-
- A detailed description of the issue
|
| 23 |
-
- Steps to reproduce the issue
|
| 24 |
-
- Logs, if applicable
|
| 25 |
-
- Screenshots, if applicable
|
| 26 |
-
|
| 27 |
-
This information is crucial in diagnosing and fixing the issue you're experiencing.
|
| 28 |
-
|
| 29 |
-
### Suggestions
|
| 30 |
-
|
| 31 |
-
We're always looking for new ideas to improve our project. If you have a suggestion, please:
|
| 32 |
-
|
| 33 |
-
- Clearly describe your suggestion, including the purpose and intended outcome.
|
| 34 |
-
- Explain why you believe this change would be beneficial to the project.
|
| 35 |
-
|
| 36 |
-
We appreciate your contributions and look forward to collaborating with you!
|
|
|
|
| 1 |
+
# Pull Request Template
|
| 2 |
|
| 3 |
+
## Description
|
| 4 |
+
Please include a brief description of the changes introduced by this PR.
|
| 5 |
|
| 6 |
+
## Related Issue(s)
|
| 7 |
+
- If this PR addresses a particular issue, please reference it here using GitHub's linking syntax, e.g., "Fixes #123".
|
| 8 |
+
- If there's no related issue, briefly explain the motivation behind these changes.
|
| 9 |
|
| 10 |
+
## Changes Made
|
| 11 |
+
Please provide a list of the changes made in this PR.
|
| 12 |
|
| 13 |
+
## Screenshots (if applicable)
|
| 14 |
+
If the changes include UI updates or visual changes, please attach relevant screenshots here.
|
| 15 |
|
| 16 |
+
## Checklist
|
| 17 |
+
- [ ] I have tested my changes locally and ensured that they work as expected.
|
| 18 |
+
- [ ] I have updated the documentation (if applicable).
|
| 19 |
+
- [ ] My code follows the project's coding conventions and style guidelines.
|
| 20 |
+
- [ ] I have added appropriate test cases (if applicable).
|
| 21 |
+
- [ ] I have reviewed my own code to ensure its quality.
|
| 22 |
|
| 23 |
+
## Additional Notes
|
| 24 |
+
Add any additional notes or context about this PR here.
|
| 25 |
|
| 26 |
+
## Reviewer(s)
|
| 27 |
+
- @reviewer1
|
| 28 |
+
- @reviewer2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Core_Advanced_RAG_components.ipynb
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"gpuType": "T4",
|
| 8 |
+
"authorship_tag": "ABX9TyNTRxOWLfv3tkZHe66pK63p",
|
| 9 |
+
"include_colab_link": true
|
| 10 |
+
},
|
| 11 |
+
"kernelspec": {
|
| 12 |
+
"name": "python3",
|
| 13 |
+
"display_name": "Python 3"
|
| 14 |
+
},
|
| 15 |
+
"language_info": {
|
| 16 |
+
"name": "python"
|
| 17 |
+
},
|
| 18 |
+
"accelerator": "GPU"
|
| 19 |
+
},
|
| 20 |
+
"cells": [
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "markdown",
|
| 23 |
+
"metadata": {
|
| 24 |
+
"id": "view-in-github",
|
| 25 |
+
"colab_type": "text"
|
| 26 |
+
},
|
| 27 |
+
"source": [
|
| 28 |
+
"<a href=\"https://colab.research.google.com/github/almutareb/advanced-rag-system-anatomy/blob/main/Core_Advanced_RAG_components.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"source": [
|
| 34 |
+
"Install requirements"
|
| 35 |
+
],
|
| 36 |
+
"metadata": {
|
| 37 |
+
"id": "Hz8JZq6Ob8rt"
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"source": [
|
| 43 |
+
"import sys\n",
|
| 44 |
+
"import os\n",
|
| 45 |
+
"!pip install -qU langchain langchain-community --no-warn-script-location > /dev/null\n",
|
| 46 |
+
"!pip install -qU beautifulsoup4 --no-warn-script-location > /dev/null\n",
|
| 47 |
+
"!pip install -qU faiss-cpu --no-warn-script-location > /dev/null\n",
|
| 48 |
+
"# use the gpu optimized version of FAISS for better performance\n",
|
| 49 |
+
"#!pip install -qU faiss-gpu --no-warn-script-location > /dev/null\n",
|
| 50 |
+
"!pip install -qU chromadb --no-warn-script-location > /dev/null\n",
|
| 51 |
+
"!pip install -qU validators --no-warn-script-location > /dev/null\n",
|
| 52 |
+
"!pip install -qU sentence_transformers typing-extensions==4.8.0 unstructured --no-warn-script-location > /dev/null\n",
|
| 53 |
+
"!pip install -qU gradio==3.48.0 --no-warn-script-location > /dev/null"
|
| 54 |
+
],
|
| 55 |
+
"metadata": {
|
| 56 |
+
"id": "SXTdFuTvboyV"
|
| 57 |
+
},
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"outputs": []
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "markdown",
|
| 63 |
+
"source": [
|
| 64 |
+
"Download Documents"
|
| 65 |
+
],
|
| 66 |
+
"metadata": {
|
| 67 |
+
"id": "pETUBgFAk4Fx"
|
| 68 |
+
}
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"cell_type": "code",
|
| 72 |
+
"source": [
|
| 73 |
+
"from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n",
|
| 74 |
+
"from bs4 import BeautifulSoup as Soup\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"# List of URLs to scrape\n",
|
| 77 |
+
"urls = [\"https://langchain-doc.readthedocs.io/en/latest\"\n",
|
| 78 |
+
" \"https://python.langchain.com/docs/get_started\"]\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"# Initialize an empty list to store the documents\n",
|
| 81 |
+
"docs = []\n",
|
| 82 |
+
"# Looping through each URL in the list - this could take some time!\n",
|
| 83 |
+
"for url in urls:\n",
|
| 84 |
+
" # max_depth set to 2 for demo purpose, should be increased for real scenario results, e.g. at least 5\n",
|
| 85 |
+
" loader = RecursiveUrlLoader(url=url, max_depth=4, extractor=lambda x: Soup(x, \"html.parser\").text)\n",
|
| 86 |
+
" docs.extend(loader.load())\n",
|
| 87 |
+
"print(f'Downloaded a total of {len(docs)} documents')"
|
| 88 |
+
],
|
| 89 |
+
"metadata": {
|
| 90 |
+
"id": "eVav9lGgk3X3"
|
| 91 |
+
},
|
| 92 |
+
"execution_count": null,
|
| 93 |
+
"outputs": []
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "markdown",
|
| 97 |
+
"source": [
|
| 98 |
+
"Chunking documents"
|
| 99 |
+
],
|
| 100 |
+
"metadata": {
|
| 101 |
+
"id": "0iurKj94w1jm"
|
| 102 |
+
}
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"cell_type": "code",
|
| 106 |
+
"source": [
|
| 107 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
| 108 |
+
"import time\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
| 111 |
+
" chunk_size = 500, # The size of each text chunk\n",
|
| 112 |
+
" chunk_overlap = 50, # Overlap between chunks to ensure continuity\n",
|
| 113 |
+
")\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"# Stage one: read all the docs, split them into chunks.\n",
|
| 116 |
+
"st = time.time() # Start time for performance measurement\n",
|
| 117 |
+
"print('Loading documents ...')\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"# Split each document into chunks using the configured text splitter\n",
|
| 120 |
+
"chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"et = time.time() - st # Calculate time taken for splitting\n",
|
| 123 |
+
"print(f'created {len(chunks)} chunks in {et} seconds.')"
|
| 124 |
+
],
|
| 125 |
+
"metadata": {
|
| 126 |
+
"id": "zSZJQeA_w2B3"
|
| 127 |
+
},
|
| 128 |
+
"execution_count": null,
|
| 129 |
+
"outputs": []
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "markdown",
|
| 133 |
+
"source": [
|
| 134 |
+
"Build VectorStore: Vectorization"
|
| 135 |
+
],
|
| 136 |
+
"metadata": {
|
| 137 |
+
"id": "oQGtHuTxkmFq"
|
| 138 |
+
}
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"source": [
|
| 143 |
+
"from langchain.vectorstores import FAISS\n",
|
| 144 |
+
"from langchain.vectorstores.utils import filter_complex_metadata\n",
|
| 145 |
+
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
| 146 |
+
"\n",
|
| 147 |
+
"# Path for saving the FAISS index\n",
|
| 148 |
+
"FAISS_INDEX_PATH = \"./vectorstore/lc-faiss-multi-mpnet-500\"\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"\n",
|
| 151 |
+
"#Stage two: embed the docs.\n",
|
| 152 |
+
"# use multi-qa-mpnet-base-dot-v1 sentence transformer to convert pieces of text in vectors to store them in the vector store\n",
|
| 153 |
+
"model_name = \"sentence-transformers/multi-qa-mpnet-base-dot-v1\"\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"# use the GPU for faster processing\n",
|
| 156 |
+
"#model_kwargs = {\"device\": \"cuda\"}\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"# Initialize HuggingFace embeddings with the specified model\n",
|
| 159 |
+
"embeddings = HuggingFaceEmbeddings(\n",
|
| 160 |
+
" model_name=model_name,\n",
|
| 161 |
+
"# model_kwargs=model_kwargs # uncomment when using a GPU, like T4 - requires extended RAM!\n",
|
| 162 |
+
" )\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"print(f'Loading chunks into vector store ...')\n",
|
| 165 |
+
"st = time.time() # Start time for performance measurement\n",
|
| 166 |
+
"\n",
|
| 167 |
+
"# Create a FAISS vector store from the document chunks and save it locally\n",
|
| 168 |
+
"db = FAISS.from_documents(filter_complex_metadata(chunks), embeddings)\n",
|
| 169 |
+
"# persist vectorstore\n",
|
| 170 |
+
"db.save_local(FAISS_INDEX_PATH)\n",
|
| 171 |
+
"\n",
|
| 172 |
+
"et = time.time() - st\n",
|
| 173 |
+
"print(f'Time taken: {et} seconds.')"
|
| 174 |
+
],
|
| 175 |
+
"metadata": {
|
| 176 |
+
"id": "qu6sDsq6c9fg"
|
| 177 |
+
},
|
| 178 |
+
"execution_count": null,
|
| 179 |
+
"outputs": []
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"cell_type": "markdown",
|
| 183 |
+
"source": [
|
| 184 |
+
"Load LLM"
|
| 185 |
+
],
|
| 186 |
+
"metadata": {
|
| 187 |
+
"id": "updDdzwj0RdJ"
|
| 188 |
+
}
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"cell_type": "code",
|
| 192 |
+
"source": [
|
| 193 |
+
"from dotenv import load_dotenv\n",
|
| 194 |
+
"# HF libraries\n",
|
| 195 |
+
"from langchain.llms import HuggingFaceHub\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"# Load environment variables from a .env file\n",
|
| 198 |
+
"CONFIG = load_dotenv(\".env\")\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"# Retrieve the Hugging Face API token from environment variables\n",
|
| 201 |
+
"HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')\n",
|
| 202 |
+
"\n",
|
| 203 |
+
"# load HF Token\n",
|
| 204 |
+
"HUGGINGFACEHUB_API_TOKEN=os.getenv('HUGGINGFACEHUB_API_TOKEN')\n",
|
| 205 |
+
"\n",
|
| 206 |
+
"# Load the model from the Hugging Face Hub\n",
|
| 207 |
+
"model_id = HuggingFaceHub(repo_id=\"mistralai/Mistral-7B-Instruct-v0.1\", model_kwargs={\n",
|
| 208 |
+
" \"temperature\":0.1,\n",
|
| 209 |
+
" \"max_new_tokens\":1024,\n",
|
| 210 |
+
" \"repetition_penalty\":1.2,\n",
|
| 211 |
+
" \"return_full_text\":False\n",
|
| 212 |
+
" })\n"
|
| 213 |
+
],
|
| 214 |
+
"metadata": {
|
| 215 |
+
"id": "GlnNrNdbg2E6"
|
| 216 |
+
},
|
| 217 |
+
"execution_count": null,
|
| 218 |
+
"outputs": []
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"cell_type": "markdown",
|
| 222 |
+
"source": [
|
| 223 |
+
"Retriever"
|
| 224 |
+
],
|
| 225 |
+
"metadata": {
|
| 226 |
+
"id": "2m3BIm090jtr"
|
| 227 |
+
}
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"cell_type": "code",
|
| 231 |
+
"source": [
|
| 232 |
+
"from langchain.embeddings import HuggingFaceHubEmbeddings\n",
|
| 233 |
+
"# vectorestore\n",
|
| 234 |
+
"from langchain.vectorstores import FAISS\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"# Load and Initialize the vector store as a retriever for the RAG pipeline\n",
|
| 237 |
+
"db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)\n",
|
| 238 |
+
"\n",
|
| 239 |
+
"retriever = db.as_retriever()"
|
| 240 |
+
],
|
| 241 |
+
"metadata": {
|
| 242 |
+
"id": "jzqPsuds0kSs"
|
| 243 |
+
},
|
| 244 |
+
"execution_count": null,
|
| 245 |
+
"outputs": []
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "markdown",
|
| 249 |
+
"source": [
|
| 250 |
+
"Template and Chat logic"
|
| 251 |
+
],
|
| 252 |
+
"metadata": {
|
| 253 |
+
"id": "Bld8lOEv0Uq-"
|
| 254 |
+
}
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"source": [
|
| 259 |
+
"# retrieval chain\n",
|
| 260 |
+
"from langchain.chains import RetrievalQA\n",
|
| 261 |
+
"# prompt template\n",
|
| 262 |
+
"from langchain.prompts import PromptTemplate\n",
|
| 263 |
+
"from langchain.memory import ConversationBufferMemory\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"\n",
|
| 266 |
+
"global qa\n",
|
| 267 |
+
"template = \"\"\"\n",
|
| 268 |
+
"You are the friendly documentation buddy Arti, who helps novice programmers in using LangChain with simple explanations and examples.\\\n",
|
| 269 |
+
" Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question :\n",
|
| 270 |
+
"------\n",
|
| 271 |
+
"<ctx>\n",
|
| 272 |
+
"{context}\n",
|
| 273 |
+
"</ctx>\n",
|
| 274 |
+
"------\n",
|
| 275 |
+
"<hs>\n",
|
| 276 |
+
"{history}\n",
|
| 277 |
+
"</hs>\n",
|
| 278 |
+
"------\n",
|
| 279 |
+
"{question}\n",
|
| 280 |
+
"Answer:\n",
|
| 281 |
+
"\"\"\"\n",
|
| 282 |
+
"# Create a PromptTemplate object with specified input variables and the defined template\n",
|
| 283 |
+
"prompt = PromptTemplate.from_template(\n",
|
| 284 |
+
" template=template,\n",
|
| 285 |
+
")\n",
|
| 286 |
+
"prompt.format(context=\"context\", history=\"history\", question=\"question\")\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"# Create a memory buffer to manage conversation history\n",
|
| 289 |
+
"memory = ConversationBufferMemory(memory_key=\"history\", input_key=\"question\")\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"# Initialize the RetrievalQA object with the specified model,\n",
|
| 292 |
+
"# retriever, and additional configurations\n",
|
| 293 |
+
"qa = RetrievalQA.from_chain_type(llm=model_id, chain_type=\"stuff\", retriever=retriever, verbose=True, return_source_documents=True, chain_type_kwargs={\n",
|
| 294 |
+
" \"verbose\": True,\n",
|
| 295 |
+
" \"memory\": memory,\n",
|
| 296 |
+
" \"prompt\": prompt\n",
|
| 297 |
+
"}\n",
|
| 298 |
+
" )"
|
| 299 |
+
],
|
| 300 |
+
"metadata": {
|
| 301 |
+
"id": "K255Ldxq0Xg6"
|
| 302 |
+
},
|
| 303 |
+
"execution_count": null,
|
| 304 |
+
"outputs": []
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"cell_type": "markdown",
|
| 308 |
+
"source": [
|
| 309 |
+
"UI - Gradio"
|
| 310 |
+
],
|
| 311 |
+
"metadata": {
|
| 312 |
+
"id": "pA5d0LL2kObx"
|
| 313 |
+
}
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
"cell_type": "code",
|
| 317 |
+
"source": [
|
| 318 |
+
"history=[]\n",
|
| 319 |
+
"query=\"draft a function to calculate a mxn matrix\"\n",
|
| 320 |
+
"question=query\n",
|
| 321 |
+
"response=qa({\"query\": query, \"history\": history, \"question\": question})\n",
|
| 322 |
+
"print(*response)"
|
| 323 |
+
],
|
| 324 |
+
"metadata": {
|
| 325 |
+
"id": "bKeoyhXPrQ2C"
|
| 326 |
+
},
|
| 327 |
+
"execution_count": null,
|
| 328 |
+
"outputs": []
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"cell_type": "code",
|
| 332 |
+
"source": [
|
| 333 |
+
"print(response['result'])"
|
| 334 |
+
],
|
| 335 |
+
"metadata": {
|
| 336 |
+
"id": "78wRMjjn0cl3"
|
| 337 |
+
},
|
| 338 |
+
"execution_count": null,
|
| 339 |
+
"outputs": []
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"cell_type": "code",
|
| 343 |
+
"source": [
|
| 344 |
+
"import gradio as gr\n",
|
| 345 |
+
"\n",
|
| 346 |
+
"# Function to add a new input to the chat history\n",
|
| 347 |
+
"def add_text(history, text):\n",
|
| 348 |
+
" # Append the new text to the history with a placeholder for the response\n",
|
| 349 |
+
" history = history + [(text, None)]\n",
|
| 350 |
+
" return history, \"\"\n",
|
| 351 |
+
"\n",
|
| 352 |
+
"# Function representing the bot's response mechanism\n",
|
| 353 |
+
"def bot(history):\n",
|
| 354 |
+
" response = infer(history[-1][0], history)\n",
|
| 355 |
+
" history[-1][1] = response['result']\n",
|
| 356 |
+
" return history\n",
|
| 357 |
+
"\n",
|
| 358 |
+
"# Function to infer the response using the RAG model\n",
|
| 359 |
+
"def infer(question, history):\n",
|
| 360 |
+
" query = question\n",
|
| 361 |
+
" result = qa({\"query\": query, \"history\": history, \"question\": question})\n",
|
| 362 |
+
" return result\n",
|
| 363 |
+
"\n",
|
| 364 |
+
"# Building the Gradio interface\n",
|
| 365 |
+
"with gr.Blocks() as demo:\n",
|
| 366 |
+
" with gr.Column(elem_id=\"col-container\"):\n",
|
| 367 |
+
" chatbot = gr.Chatbot([], elem_id=\"chatbot\")\n",
|
| 368 |
+
" clear = gr.Button(\"Clear\")\n",
|
| 369 |
+
"\n",
|
| 370 |
+
" # Create a row for the question input\n",
|
| 371 |
+
" with gr.Row():\n",
|
| 372 |
+
" question = gr.Textbox(label=\"Question\", placeholder=\"Type your question and hit Enter \")\n",
|
| 373 |
+
"\n",
|
| 374 |
+
" # Define the action when the question is submitted\n",
|
| 375 |
+
" question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(\n",
|
| 376 |
+
" bot, chatbot, chatbot\n",
|
| 377 |
+
" )\n",
|
| 378 |
+
"\n",
|
| 379 |
+
" # Define the action for the clear button\n",
|
| 380 |
+
" clear.click(lambda: None, None, chatbot, queue=False)\n",
|
| 381 |
+
"\n",
|
| 382 |
+
"# Launch the Gradio demo interface\n",
|
| 383 |
+
"demo.launch(share=False)"
|
| 384 |
+
],
|
| 385 |
+
"metadata": {
|
| 386 |
+
"id": "OHVkFa6MkCir"
|
| 387 |
+
},
|
| 388 |
+
"execution_count": null,
|
| 389 |
+
"outputs": []
|
| 390 |
+
}
|
| 391 |
+
]
|
| 392 |
+
}
|
README.md
CHANGED
|
@@ -1,32 +1,33 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
-
|
| 9 |
-
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
-
|
| 25 |
-
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
| 1 |
+
# Anatomy of Advanced Enterprise Rag Systems
|
| 2 |
+
|
| 3 |
+
This repository accompanies the blog series "The Anatomy of Advanced Enterprise Rag Systems" and provides a hands-on learning experience for building sophisticated Rag systems. Dive deep into each component, from setup and evaluation to security and multi-agent interactions.
|
| 4 |
+
|
| 5 |
+
Explore these key topics:
|
| 6 |
+
|
| 7 |
+
- Test Setup and Evaluation Metrics: Learn how to assess the performance and effectiveness of your Rag system.
|
| 8 |
+
- Data Preparation and Management: Discover techniques for organizing and optimizing your knowledge base.
|
| 9 |
+
- User Input Processing: Understand how to handle diverse user queries and extract relevant information.
|
| 10 |
+
- Retrieval System: Unleash the power of retrieving relevant passages from your knowledge base.
|
| 11 |
+
- Information Processing and Generation: Craft accurate and informative responses using state-of-the-art techniques.
|
| 12 |
+
- Feedback and Continuous Improvement: Enhance your Rag system over time using user feedback and data analysis.
|
| 13 |
+
- Multi-agents and Agent-services: Explore advanced architectures for distributed and collaborative Rag systems.
|
| 14 |
+
- Monitoring and Security: Ensure the robustness and trustworthiness of your Rag system with proper monitoring and security practices.
|
| 15 |
+
|
| 16 |
+
What you'll find here:
|
| 17 |
+
|
| 18 |
+
- Code examples: Implementations of key concepts from each topic, ready to use and adapt.
|
| 19 |
+
- Data samples: Pre-prepared data sets for experimentation and testing.
|
| 20 |
+
- Additional resources: Links to relevant articles, libraries, and tools to deepen your understanding.
|
| 21 |
+
|
| 22 |
+
Getting started:
|
| 23 |
+
|
| 24 |
+
- Clone this repository: git clone https://github.com/<username>/advanced-enterprise-rag-systems.git
|
| 25 |
+
- Follow the instructions in each topic directory.
|
| 26 |
+
|
| 27 |
+
Contributing:
|
| 28 |
+
|
| 29 |
+
We welcome your contributions! Share your expertise, improve existing code examples, or add new ones. Submit a pull request to share your valuable additions.
|
| 30 |
+
|
| 31 |
+
License:
|
| 32 |
+
|
| 33 |
+
This project is licensed under the MIT License: LICENSE.
|
core-langchain-rag.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importing necessary libraries
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
# # Importing RecursiveUrlLoader for web scraping and BeautifulSoup for HTML parsing
|
| 7 |
+
# from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
| 8 |
+
# from bs4 import BeautifulSoup as Soup
|
| 9 |
+
# import mimetypes
|
| 10 |
+
|
| 11 |
+
# # List of URLs to scrape
|
| 12 |
+
# urls = ["https://langchain-doc.readthedocs.io/en/latest"]
|
| 13 |
+
|
| 14 |
+
# # Initialize an empty list to store the documents
|
| 15 |
+
# docs = []
|
| 16 |
+
|
| 17 |
+
# # Looping through each URL in the list - this could take some time!
|
| 18 |
+
# stf = time.time() # Start time for performance measurement
|
| 19 |
+
# for url in urls:
|
| 20 |
+
# try:
|
| 21 |
+
# st = time.time() # Start time for performance measurement
|
| 22 |
+
# # Create a RecursiveUrlLoader instance with a specified URL and depth
|
| 23 |
+
# # The extractor function uses BeautifulSoup to parse the HTML content and extract text
|
| 24 |
+
# loader = RecursiveUrlLoader(url=url, max_depth=5, extractor=lambda x: Soup(x, "html.parser").text)
|
| 25 |
+
|
| 26 |
+
# # Load the documents from the URL and extend the docs list
|
| 27 |
+
# docs.extend(loader.load())
|
| 28 |
+
|
| 29 |
+
# et = time.time() - st # Calculate time taken for splitting
|
| 30 |
+
# print(f'Time taken for downloading documents from {url}: {et} seconds.')
|
| 31 |
+
# except Exception as e:
|
| 32 |
+
# # Print an error message if there is an issue with loading or parsing the URL
|
| 33 |
+
# print(f"Failed to load or parse the URL {url}. Error: {e}", file=sys.stderr)
|
| 34 |
+
# etf = time.time() - stf # Calculate time taken for splitting
|
| 35 |
+
# print(f'Total time taken for downloading {len(docs)} documents: {etf} seconds.')
|
| 36 |
+
|
| 37 |
+
# # Import necessary modules for text splitting and vectorization
|
| 38 |
+
# from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 39 |
+
# import time
|
| 40 |
+
# from langchain_community.vectorstores import FAISS
|
| 41 |
+
# from langchain.vectorstores.utils import filter_complex_metadata
|
| 42 |
+
# from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 43 |
+
|
| 44 |
+
# # Configure the text splitter
|
| 45 |
+
# text_splitter = RecursiveCharacterTextSplitter(
|
| 46 |
+
# separators=["\n\n", "\n", "(?<=\. )", " ", ""], # Define the separators for splitting text
|
| 47 |
+
# chunk_size=500, # The size of each text chunk
|
| 48 |
+
# chunk_overlap=50, # Overlap between chunks to ensure continuity
|
| 49 |
+
# length_function=len, # Function to determine the length of each chunk
|
| 50 |
+
# )
|
| 51 |
+
|
| 52 |
+
# try:
|
| 53 |
+
# # Stage one: Splitting the documents into chunks for vectorization
|
| 54 |
+
# st = time.time() # Start time for performance measurement
|
| 55 |
+
# print('Loading documents and creating chunks ...')
|
| 56 |
+
# # Split each document into chunks using the configured text splitter
|
| 57 |
+
# chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
|
| 58 |
+
# et = time.time() - st # Calculate time taken for splitting
|
| 59 |
+
# print(f"created "+chunks+" chunks")
|
| 60 |
+
# print(f'Time taken for document chunking: {et} seconds.')
|
| 61 |
+
# except Exception as e:
|
| 62 |
+
# print(f"Error during document chunking: {e}", file=sys.stderr)
|
| 63 |
+
|
| 64 |
+
# # Path for saving the FAISS index
|
| 65 |
+
# FAISS_INDEX_PATH = "./vectorstore/lc-faiss-multi-mpnet-500"
|
| 66 |
+
|
| 67 |
+
# try:
|
| 68 |
+
# # Stage two: Vectorization of the document chunks
|
| 69 |
+
# model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1" # Model used for embedding
|
| 70 |
+
|
| 71 |
+
# # Initialize HuggingFace embeddings with the specified model
|
| 72 |
+
# embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
| 73 |
+
|
| 74 |
+
# print(f'Loading chunks into vector store ...')
|
| 75 |
+
# st = time.time() # Start time for performance measurement
|
| 76 |
+
# # Create a FAISS vector store from the document chunks and save it locally
|
| 77 |
+
# db = FAISS.from_documents(filter_complex_metadata(chunks), embeddings)
|
| 78 |
+
# db.save_local(FAISS_INDEX_PATH)
|
| 79 |
+
# et = time.time() - st # Calculate time taken for vectorization
|
| 80 |
+
# print(f'Time taken for vectorization and saving: {et} seconds.')
|
| 81 |
+
# except Exception as e:
|
| 82 |
+
# print(f"Error during vectorization or FAISS index saving: {e}", file=sys.stderr)
|
| 83 |
+
|
| 84 |
+
# alternatively download a preparaed vectorized index from S3 and load the index into vectorstore
|
| 85 |
+
# Import necessary libraries for AWS S3 interaction, file handling, and FAISS vector stores
|
| 86 |
+
import boto3
|
| 87 |
+
from botocore import UNSIGNED
|
| 88 |
+
from botocore.client import Config
|
| 89 |
+
import zipfile
|
| 90 |
+
from langchain_community.vectorstores import FAISS
|
| 91 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 92 |
+
from dotenv import load_dotenv
|
| 93 |
+
|
| 94 |
+
# Load environment variables from a .env file
|
| 95 |
+
config = load_dotenv(".env")
|
| 96 |
+
|
| 97 |
+
# Retrieve the Hugging Face API token from environment variables
|
| 98 |
+
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
| 99 |
+
S3_LOCATION = os.getenv("S3_LOCATION")
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
# Initialize an S3 client with unsigned configuration for public access
|
| 103 |
+
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
|
| 104 |
+
|
| 105 |
+
# Define the FAISS index path and the destination for the downloaded file
|
| 106 |
+
FAISS_INDEX_PATH = './vectorstore/lc-faiss-multi-mpnet-500-markdown'
|
| 107 |
+
VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
|
| 108 |
+
|
| 109 |
+
# Download the pre-prepared vectorized index from the S3 bucket
|
| 110 |
+
print("Downloading the pre-prepared vectorized index from S3...")
|
| 111 |
+
s3.download_file(S3_LOCATION, 'vectorstores/lc-faiss-multi-mpnet-500-markdown.zip', VS_DESTINATION)
|
| 112 |
+
|
| 113 |
+
# Extract the downloaded zip file
|
| 114 |
+
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
|
| 115 |
+
zip_ref.extractall('./vectorstore/')
|
| 116 |
+
print("Download and extraction completed.")
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
|
| 120 |
+
|
| 121 |
+
# Define the model name for embeddings
|
| 122 |
+
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
| 123 |
+
|
| 124 |
+
try:
|
| 125 |
+
# Initialize HuggingFace embeddings with the specified model
|
| 126 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
| 127 |
+
|
| 128 |
+
# Load the local FAISS index with the specified embeddings
|
| 129 |
+
db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
|
| 130 |
+
print("FAISS index loaded successfully.")
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error during FAISS index loading: {e}", file=sys.stderr)
|
| 133 |
+
|
| 134 |
+
# Import necessary modules for environment variable management and HuggingFace integration
|
| 135 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
| 136 |
+
|
| 137 |
+
# Initialize the vector store as a retriever for the RAG pipeline
|
| 138 |
+
retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 3, 'lambda_mult': 0.25})
|
| 139 |
+
|
| 140 |
+
try:
|
| 141 |
+
# Load the model from the Hugging Face Hub
|
| 142 |
+
model_id = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 143 |
+
temperature=0.1, # Controls randomness in response generation (lower value means less random)
|
| 144 |
+
max_new_tokens=1024, # Maximum number of new tokens to generate in responses
|
| 145 |
+
repetition_penalty=1.2, # Penalty for repeating the same words (higher value increases penalty)
|
| 146 |
+
return_full_text=False # If False, only the newly generated text is returned; if True, the input is included as well
|
| 147 |
+
)
|
| 148 |
+
print("Model loaded successfully from Hugging Face Hub.")
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print(f"Error loading model from Hugging Face Hub: {e}", file=sys.stderr)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# Importing necessary modules for retrieval-based question answering and prompt handling
|
| 155 |
+
from langchain.chains import RetrievalQA
|
| 156 |
+
from langchain.prompts import PromptTemplate
|
| 157 |
+
from langchain.memory import ConversationBufferMemory
|
| 158 |
+
|
| 159 |
+
# Declare a global variable 'qa' for the retrieval-based question answering system
|
| 160 |
+
global qa
|
| 161 |
+
|
| 162 |
+
# Define a prompt template for guiding the model's responses
|
| 163 |
+
template = """
|
| 164 |
+
You are the friendly documentation buddy Arti, if you don't know the answer say 'I don't know' and don't make things up.\
|
| 165 |
+
Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question :
|
| 166 |
+
------
|
| 167 |
+
<ctx>
|
| 168 |
+
{context}
|
| 169 |
+
</ctx>
|
| 170 |
+
------
|
| 171 |
+
<hs>
|
| 172 |
+
{history}
|
| 173 |
+
</hs>
|
| 174 |
+
------
|
| 175 |
+
{question}
|
| 176 |
+
Answer:
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
# Create a PromptTemplate object with specified input variables and the defined template
|
| 180 |
+
prompt = PromptTemplate.from_template(
|
| 181 |
+
#input_variables=["history", "context", "question"], # Variables to be included in the prompt
|
| 182 |
+
template=template, # The prompt template as defined above
|
| 183 |
+
)
|
| 184 |
+
prompt.format(context="context", history="history", question="question")
|
| 185 |
+
# Create a memory buffer to manage conversation history
|
| 186 |
+
memory = ConversationBufferMemory(
|
| 187 |
+
memory_key="history", # Key for storing the conversation history
|
| 188 |
+
input_key="question" # Key for the input question
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
# Initialize the RetrievalQA object with the specified model, retriever, and additional configurations
|
| 192 |
+
qa = RetrievalQA.from_chain_type(
|
| 193 |
+
llm=model_id, # Language model loaded from Hugging Face Hub
|
| 194 |
+
retriever=retriever, # The vector store retriever initialized earlier
|
| 195 |
+
return_source_documents=True, # Option to return source documents along with responses
|
| 196 |
+
chain_type_kwargs={
|
| 197 |
+
"verbose": True, # Enables verbose output for debugging and analysis
|
| 198 |
+
"memory": memory, # Memory buffer for managing conversation history
|
| 199 |
+
"prompt": prompt # Prompt template for guiding the model's responses
|
| 200 |
+
}
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Import Gradio for UI, along with other necessary libraries
|
| 204 |
+
import gradio as gr
|
| 205 |
+
import random
|
| 206 |
+
import time
|
| 207 |
+
|
| 208 |
+
# Function to add a new input to the chat history
|
| 209 |
+
def add_text(history, text):
|
| 210 |
+
# Append the new text to the history with a placeholder for the response
|
| 211 |
+
history = history + [(text, None)]
|
| 212 |
+
return history, ""
|
| 213 |
+
|
| 214 |
+
# Function representing the bot's response mechanism
|
| 215 |
+
def bot(history):
|
| 216 |
+
# Obtain the response from the 'infer' function using the latest input
|
| 217 |
+
response = infer(history[-1][0], history)
|
| 218 |
+
sources = [doc.metadata.get("source") for doc in response['source_documents']]
|
| 219 |
+
src_list = '\n'.join(sources)
|
| 220 |
+
print_this = response['result'] + "\n\n\n Sources: \n\n\n" + src_list
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
history[-1][1] = print_this #response['answer']
|
| 224 |
+
# Update the history with the bot's response
|
| 225 |
+
#history[-1][1] = response['result']
|
| 226 |
+
return history
|
| 227 |
+
|
| 228 |
+
# Function to infer the response using the RAG model
|
| 229 |
+
def infer(question, history):
|
| 230 |
+
# Use the question and history to query the RAG model
|
| 231 |
+
result = qa({"query": question, "history": history, "question": question})
|
| 232 |
+
return result
|
| 233 |
+
|
| 234 |
+
# CSS styling for the Gradio interface
|
| 235 |
+
css = """
|
| 236 |
+
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
| 237 |
+
"""
|
| 238 |
+
|
| 239 |
+
# HTML content for the Gradio interface title
|
| 240 |
+
title = """
|
| 241 |
+
<div style="text-align: center;max-width: 700px;">
|
| 242 |
+
<h1>Chat with your Documentation</h1>
|
| 243 |
+
<p style="text-align: center;">Chat with LangChain Documentation, <br />
|
| 244 |
+
You can ask questions about the LangChain docu ;)</p>
|
| 245 |
+
</div>
|
| 246 |
+
"""
|
| 247 |
+
|
| 248 |
+
# Building the Gradio interface
|
| 249 |
+
with gr.Blocks(css=css) as demo:
|
| 250 |
+
with gr.Column(elem_id="col-container"):
|
| 251 |
+
gr.HTML(title) # Add the HTML title to the interface
|
| 252 |
+
chatbot = gr.Chatbot([], elem_id="chatbot") # Initialize the chatbot component
|
| 253 |
+
clear = gr.Button("Clear") # Add a button to clear the chat
|
| 254 |
+
|
| 255 |
+
# Create a row for the question input
|
| 256 |
+
with gr.Row():
|
| 257 |
+
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
|
| 258 |
+
|
| 259 |
+
# Define the action when the question is submitted
|
| 260 |
+
question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
|
| 261 |
+
bot, chatbot, chatbot
|
| 262 |
+
)
|
| 263 |
+
# Define the action for the clear button
|
| 264 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
| 265 |
+
|
| 266 |
+
# Launch the Gradio demo interface
|
| 267 |
+
demo.launch(share=False)
|
docs/advanced_rag_architecture.drawio
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<mxfile host="app.diagrams.net" modified="2024-02-02T10:57:09.662Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0" etag="jBjrxQrE8FMZUdqYmkDs" version="22.1.21" type="github">
|
| 2 |
+
<diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">
|
| 3 |
+
<mxGraphModel dx="1434" dy="774" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
| 4 |
+
<root>
|
| 5 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-0" />
|
| 6 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-1" parent="WIyWlLk6GJQsqaUBKTNV-0" />
|
| 7 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-59" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-4" target="7HGE-dyt3ShhVV6eNgTS-22">
|
| 8 |
+
<mxGeometry relative="1" as="geometry" />
|
| 9 |
+
</mxCell>
|
| 10 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-4" value="Retrieval System" style="swimlane;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;startSize=23;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 11 |
+
<mxGeometry x="280" y="360" width="290" height="280" as="geometry" />
|
| 12 |
+
</mxCell>
|
| 13 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-7" value="<div>Indicies</div>" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.database;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
|
| 14 |
+
<mxGeometry x="44" y="40" width="60" height="60" as="geometry" />
|
| 15 |
+
</mxCell>
|
| 16 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-32" value="Re-ranking" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
|
| 17 |
+
<mxGeometry x="14" y="130" width="120" height="40" as="geometry" />
|
| 18 |
+
</mxCell>
|
| 19 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-34" value="Hypothetical Questions and HyDE" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
|
| 20 |
+
<mxGeometry x="160" y="130" width="120" height="40" as="geometry" />
|
| 21 |
+
</mxCell>
|
| 22 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-37" value="Fine-tuning Embeddings" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
|
| 23 |
+
<mxGeometry x="14" y="195" width="120" height="40" as="geometry" />
|
| 24 |
+
</mxCell>
|
| 25 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-33" value="Hyperparamter Tuning" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-4">
|
| 26 |
+
<mxGeometry x="160" y="50" width="120" height="40" as="geometry" />
|
| 27 |
+
</mxCell>
|
| 28 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-56" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=1;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-14" target="7HGE-dyt3ShhVV6eNgTS-19">
|
| 29 |
+
<mxGeometry relative="1" as="geometry" />
|
| 30 |
+
</mxCell>
|
| 31 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-14" value="<div>Data Preparation and Management</div>" style="swimlane;whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 32 |
+
<mxGeometry x="40" y="40" width="360" height="240" as="geometry">
|
| 33 |
+
<mxRectangle x="410" y="40" width="240" height="30" as="alternateBounds" />
|
| 34 |
+
</mxGeometry>
|
| 35 |
+
</mxCell>
|
| 36 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-23" value="Chunking &amp; Vectorization" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
|
| 37 |
+
<mxGeometry x="14" y="50" width="120" height="40" as="geometry" />
|
| 38 |
+
</mxCell>
|
| 39 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-24" value="Metadata and Summaries" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
|
| 40 |
+
<mxGeometry x="161" y="50" width="120" height="40" as="geometry" />
|
| 41 |
+
</mxCell>
|
| 42 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-25" value="User Profile Management" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
|
| 43 |
+
<mxGeometry x="14" y="190" width="120" height="40" as="geometry" />
|
| 44 |
+
</mxCell>
|
| 45 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-26" value="Data Cleaning" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
|
| 46 |
+
<mxGeometry x="14" y="120" width="120" height="40" as="geometry" />
|
| 47 |
+
</mxCell>
|
| 48 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-31" value="Complex Formats Handling" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-14">
|
| 49 |
+
<mxGeometry x="161" y="120" width="120" height="40" as="geometry" />
|
| 50 |
+
</mxCell>
|
| 51 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-57" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-19" target="7HGE-dyt3ShhVV6eNgTS-4">
|
| 52 |
+
<mxGeometry relative="1" as="geometry" />
|
| 53 |
+
</mxCell>
|
| 54 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-19" value="<div>User Input processing</div>" style="swimlane;horizontal=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 55 |
+
<mxGeometry x="40" y="360" width="200" height="280" as="geometry" />
|
| 56 |
+
</mxCell>
|
| 57 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-3" value="User Authentication" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" parent="7HGE-dyt3ShhVV6eNgTS-19" vertex="1">
|
| 58 |
+
<mxGeometry x="40" y="20" width="120" height="40" as="geometry" />
|
| 59 |
+
</mxCell>
|
| 60 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-7" value="Query Rewriter" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" parent="7HGE-dyt3ShhVV6eNgTS-19" vertex="1">
|
| 61 |
+
<mxGeometry x="40" y="80" width="120" height="40" as="geometry" />
|
| 62 |
+
</mxCell>
|
| 63 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-8" value="Input Guardrail" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-19">
|
| 64 |
+
<mxGeometry x="40" y="140" width="120" height="40" as="geometry" />
|
| 65 |
+
</mxCell>
|
| 66 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-5" value="chat history&nbsp; " style="strokeWidth=2;html=1;shape=mxgraph.flowchart.multi-document;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-19">
|
| 67 |
+
<mxGeometry x="56" y="210" width="88" height="60" as="geometry" />
|
| 68 |
+
</mxCell>
|
| 69 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-60" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0;exitDx=0;exitDy=0;entryX=1;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-22" target="7HGE-dyt3ShhVV6eNgTS-44">
|
| 70 |
+
<mxGeometry relative="1" as="geometry">
|
| 71 |
+
<mxPoint x="790" y="280" as="targetPoint" />
|
| 72 |
+
</mxGeometry>
|
| 73 |
+
</mxCell>
|
| 74 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-22" value="Information Processing and Generation" style="swimlane;horizontal=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 75 |
+
<mxGeometry x="600" y="360" width="200" height="280" as="geometry" />
|
| 76 |
+
</mxCell>
|
| 77 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-9" value="Response Generation" style="rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
|
| 78 |
+
<mxGeometry x="40" y="20" width="120" height="40" as="geometry" />
|
| 79 |
+
</mxCell>
|
| 80 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-28" value="Output Guardrails and Moderation" style="rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
|
| 81 |
+
<mxGeometry x="40" y="80" width="120" height="40" as="geometry" />
|
| 82 |
+
</mxCell>
|
| 83 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-29" value="Caching" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.multi-document;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
|
| 84 |
+
<mxGeometry x="56" y="140" width="88" height="60" as="geometry" />
|
| 85 |
+
</mxCell>
|
| 86 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-30" value="Personalization and Customization" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-22">
|
| 87 |
+
<mxGeometry x="40" y="215" width="120" height="40" as="geometry" />
|
| 88 |
+
</mxCell>
|
| 89 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-61" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=1;entryY=0;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="7HGE-dyt3ShhVV6eNgTS-44" target="7HGE-dyt3ShhVV6eNgTS-14">
|
| 90 |
+
<mxGeometry relative="1" as="geometry">
|
| 91 |
+
<Array as="points">
|
| 92 |
+
<mxPoint x="440" y="40" />
|
| 93 |
+
<mxPoint x="440" y="40" />
|
| 94 |
+
</Array>
|
| 95 |
+
</mxGeometry>
|
| 96 |
+
</mxCell>
|
| 97 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-44" value="Feedback and Continuous Improvement" style="swimlane;whiteSpace=wrap;html=1;startSize=23;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 98 |
+
<mxGeometry x="480" y="40" width="320" height="240" as="geometry" />
|
| 99 |
+
</mxCell>
|
| 100 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-46" value="Data Refinement" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
|
| 101 |
+
<mxGeometry x="170" y="50" width="120" height="40" as="geometry" />
|
| 102 |
+
</mxCell>
|
| 103 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-47" value="System Monitoring" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
|
| 104 |
+
<mxGeometry x="170" y="120" width="120" height="40" as="geometry" />
|
| 105 |
+
</mxCell>
|
| 106 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-48" value="Generation Evaluation" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
|
| 107 |
+
<mxGeometry x="20" y="120" width="120" height="40" as="geometry" />
|
| 108 |
+
</mxCell>
|
| 109 |
+
<mxCell id="7HGE-dyt3ShhVV6eNgTS-49" value="User Feedback" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="7HGE-dyt3ShhVV6eNgTS-44">
|
| 110 |
+
<mxGeometry x="20" y="50" width="120" height="40" as="geometry" />
|
| 111 |
+
</mxCell>
|
| 112 |
+
</root>
|
| 113 |
+
</mxGraphModel>
|
| 114 |
+
</diagram>
|
| 115 |
+
</mxfile>
|
docs/data_flow_diagram.drawio.png
ADDED
|
docs/template.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Template
|
| 2 |
+
|
| 3 |
+
1. **Architecture of advanced RAG**
|
| 4 |
+
2. **Test setup and Evaluation metrics**
|
| 5 |
+
3. **Data preparation (vectorization & chunking)**
|
| 6 |
+
4. **Search indexing**
|
| 7 |
+
5. **Query transformation**
|
| 8 |
+
6. **Chat logic and query routing**
|
| 9 |
+
7. **Multi agents and agent-services**
|
| 10 |
+
8. **Monitoring responses and adding security**
|
| 11 |
+
|
| 12 |
+
## Additional Resources
|
| 13 |
+
|
| 14 |
+
[Enterprise Rag](https://www.rungalileo.io/blog/mastering-rag-how-to-architect-an-enterprise-rag-systemhttps://www.rungalileo.io/blog/mastering-rag-how-to-architect-an-enterprise-rag-system)
|
| 15 |
+
|
| 16 |
+
[Advanced RAG](https://medium.com/towards-artificial-intelligence/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6)
|
docs/workflow-advanced-rag.drawio
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<mxfile host="app.diagrams.net" modified="2024-02-02T11:21:08.029Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0" etag="EvpGiXuqtWkE4FAqL8_g" version="22.1.21" type="github">
|
| 2 |
+
<diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">
|
| 3 |
+
<mxGraphModel dx="1434" dy="774" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
| 4 |
+
<root>
|
| 5 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-0" />
|
| 6 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-1" parent="WIyWlLk6GJQsqaUBKTNV-0" />
|
| 7 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-1" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-3" target="eFb6EC-VP60E3mpf6WAh-0">
|
| 8 |
+
<mxGeometry relative="1" as="geometry" />
|
| 9 |
+
</mxCell>
|
| 10 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-3" value="User Authentication and Input" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#d5e8d4;strokeColor=#82b366;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
|
| 11 |
+
<mxGeometry x="24" y="80" width="120" height="40" as="geometry" />
|
| 12 |
+
</mxCell>
|
| 13 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-12" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-7" target="WIyWlLk6GJQsqaUBKTNV-11">
|
| 14 |
+
<mxGeometry relative="1" as="geometry" />
|
| 15 |
+
</mxCell>
|
| 16 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-7" value="Query Processing" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#d5e8d4;strokeColor=#82b366;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
|
| 17 |
+
<mxGeometry x="180" y="160" width="120" height="40" as="geometry" />
|
| 18 |
+
</mxCell>
|
| 19 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-11" target="eFb6EC-VP60E3mpf6WAh-3">
|
| 20 |
+
<mxGeometry relative="1" as="geometry" />
|
| 21 |
+
</mxCell>
|
| 22 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-11" value="Data Preparation and Management" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#ffe6cc;strokeColor=#d79b00;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
|
| 23 |
+
<mxGeometry x="330" y="160" width="120" height="40" as="geometry" />
|
| 24 |
+
</mxCell>
|
| 25 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-0" target="WIyWlLk6GJQsqaUBKTNV-7">
|
| 26 |
+
<mxGeometry relative="1" as="geometry" />
|
| 27 |
+
</mxCell>
|
| 28 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-0" value="Input Guardrails" style="whiteSpace=wrap;html=1;rounded=1;glass=0;strokeWidth=1;shadow=0;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 29 |
+
<mxGeometry x="24" y="160" width="120" height="40" as="geometry" />
|
| 30 |
+
</mxCell>
|
| 31 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-3" target="eFb6EC-VP60E3mpf6WAh-5">
|
| 32 |
+
<mxGeometry relative="1" as="geometry" />
|
| 33 |
+
</mxCell>
|
| 34 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-3" value="Retrieval System" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 35 |
+
<mxGeometry x="480" y="160" width="120" height="40" as="geometry" />
|
| 36 |
+
</mxCell>
|
| 37 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-5" target="eFb6EC-VP60E3mpf6WAh-9">
|
| 38 |
+
<mxGeometry relative="1" as="geometry" />
|
| 39 |
+
</mxCell>
|
| 40 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-18" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=0.75;entryY=1;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-5" target="eFb6EC-VP60E3mpf6WAh-6">
|
| 41 |
+
<mxGeometry relative="1" as="geometry">
|
| 42 |
+
<Array as="points">
|
| 43 |
+
<mxPoint x="630" y="130" />
|
| 44 |
+
<mxPoint x="450" y="130" />
|
| 45 |
+
</Array>
|
| 46 |
+
</mxGeometry>
|
| 47 |
+
</mxCell>
|
| 48 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-5" value="Information Processing and Augmentation" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 49 |
+
<mxGeometry x="630" y="160" width="120" height="40" as="geometry" />
|
| 50 |
+
</mxCell>
|
| 51 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-6" value="Observability and Feedback" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 52 |
+
<mxGeometry x="360" y="40" width="120" height="40" as="geometry" />
|
| 53 |
+
</mxCell>
|
| 54 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.98;exitY=0.02;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.575;entryY=1;entryDx=0;entryDy=0;dashed=1;entryPerimeter=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-8" target="eFb6EC-VP60E3mpf6WAh-3">
|
| 55 |
+
<mxGeometry relative="1" as="geometry" />
|
| 56 |
+
</mxCell>
|
| 57 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-8" value="Caching" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.multi-document;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 58 |
+
<mxGeometry x="376" y="240" width="88" height="60" as="geometry" />
|
| 59 |
+
</mxCell>
|
| 60 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-9" target="eFb6EC-VP60E3mpf6WAh-6">
|
| 61 |
+
<mxGeometry relative="1" as="geometry" />
|
| 62 |
+
</mxCell>
|
| 63 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-9" value="Output and Response" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
| 64 |
+
<mxGeometry x="630" y="80" width="120" height="40" as="geometry" />
|
| 65 |
+
</mxCell>
|
| 66 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0;exitDx=0;exitDy=0;entryX=0.325;entryY=0.975;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-0" target="eFb6EC-VP60E3mpf6WAh-6">
|
| 67 |
+
<mxGeometry relative="1" as="geometry">
|
| 68 |
+
<Array as="points">
|
| 69 |
+
<mxPoint x="144" y="130" />
|
| 70 |
+
<mxPoint x="399" y="130" />
|
| 71 |
+
</Array>
|
| 72 |
+
</mxGeometry>
|
| 73 |
+
</mxCell>
|
| 74 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.75;exitY=1;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-7" target="eFb6EC-VP60E3mpf6WAh-8">
|
| 75 |
+
<mxGeometry relative="1" as="geometry" />
|
| 76 |
+
</mxCell>
|
| 77 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-20" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-9" target="eFb6EC-VP60E3mpf6WAh-8">
|
| 78 |
+
<mxGeometry relative="1" as="geometry" />
|
| 79 |
+
</mxCell>
|
| 80 |
+
</root>
|
| 81 |
+
</mxGraphModel>
|
| 82 |
+
</diagram>
|
| 83 |
+
</mxfile>
|
rag-system-anatomy/build_vector_store.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# vectorization functions
|
| 2 |
+
from langchain.vectorstores import FAISS
|
| 3 |
+
from langchain.document_loaders import ReadTheDocsLoader
|
| 4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 6 |
+
from create_embedding import create_embeddings
|
| 7 |
+
import time
|
| 8 |
+
|
| 9 |
+
def build_vector_store(
|
| 10 |
+
docs: list,
|
| 11 |
+
db_path: str,
|
| 12 |
+
embedding_model: str,
|
| 13 |
+
new_db:bool=False,
|
| 14 |
+
chunk_size:int=500,
|
| 15 |
+
chunk_overlap:int=50,
|
| 16 |
+
):
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
if db_path is None:
|
| 22 |
+
FAISS_INDEX_PATH = "./vectorstore/py-faiss-multi-mpnet-500"
|
| 23 |
+
else:
|
| 24 |
+
FAISS_INDEX_PATH = db_path
|
| 25 |
+
|
| 26 |
+
embeddings,chunks = create_embeddings(docs, embedding_model, chunk_size, chunk_overlap)
|
| 27 |
+
|
| 28 |
+
#load chunks into vector store
|
| 29 |
+
print(f'Loading chunks into faiss vector store ...')
|
| 30 |
+
st = time.time()
|
| 31 |
+
if new_db:
|
| 32 |
+
db_faiss = FAISS.from_documents(chunks, embeddings)
|
| 33 |
+
else:
|
| 34 |
+
db_faiss = FAISS.add_documents(chunks, embeddings)
|
| 35 |
+
db_faiss.save_local(FAISS_INDEX_PATH)
|
| 36 |
+
et = time.time() - st
|
| 37 |
+
print(f'Time taken: {et} seconds.')
|
| 38 |
+
|
| 39 |
+
#print(f'Loading chunks into chroma vector store ...')
|
| 40 |
+
#st = time.time()
|
| 41 |
+
#persist_directory='./vectorstore/py-chroma-multi-mpnet-500'
|
| 42 |
+
#db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory)
|
| 43 |
+
#et = time.time() - st
|
| 44 |
+
#print(f'Time taken: {et} seconds.')
|
| 45 |
+
result = f"built vectore store at {FAISS_INDEX_PATH}"
|
| 46 |
+
return result
|
rag-system-anatomy/create_embedding.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# embeddings functions
|
| 2 |
+
from langchain.vectorstores import FAISS
|
| 3 |
+
from langchain.document_loaders import ReadTheDocsLoader
|
| 4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 6 |
+
import time
|
| 7 |
+
from langchain_core.documents import Document
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def create_embeddings(
|
| 11 |
+
docs: list[Document],
|
| 12 |
+
chunk_size:int,
|
| 13 |
+
chunk_overlap:int,
|
| 14 |
+
embedding_model: str = "sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
| 15 |
+
):
|
| 16 |
+
"""given a sequence of `Document` objects this fucntion will
|
| 17 |
+
generate embeddings for it.
|
| 18 |
+
|
| 19 |
+
## argument
|
| 20 |
+
:params docs (list[Document]) -> list of `list[Document]`
|
| 21 |
+
:params chunk_size (int) -> chunk size in which documents are chunks
|
| 22 |
+
:params chunk_overlap (int) -> the amount of token that will be overlapped between chunks
|
| 23 |
+
:params embedding_model (str) -> the huggingspace model that will embed the documents
|
| 24 |
+
## Return
|
| 25 |
+
Tuple of embedding and chunks
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 30 |
+
separators=["\n\n", "\n", "(?<=\. )", " ", ""],
|
| 31 |
+
chunk_size = chunk_size,
|
| 32 |
+
chunk_overlap = chunk_overlap,
|
| 33 |
+
length_function = len,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Stage one: read all the docs, split them into chunks.
|
| 37 |
+
st = time.time()
|
| 38 |
+
print('Loading documents ...')
|
| 39 |
+
|
| 40 |
+
chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
|
| 41 |
+
et = time.time() - st
|
| 42 |
+
print(f'Time taken: {et} seconds.')
|
| 43 |
+
|
| 44 |
+
#Stage two: embed the docs.
|
| 45 |
+
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
|
| 46 |
+
print(f"create a total of {len(chunks)}")
|
| 47 |
+
|
| 48 |
+
return embeddings,chunks
|
rag-system-anatomy/get_db_retriever.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# retriever and qa_chain function
|
| 2 |
+
|
| 3 |
+
# HF libraries
|
| 4 |
+
from langchain.llms import HuggingFaceHub
|
| 5 |
+
from langchain.embeddings import HuggingFaceHubEmbeddings
|
| 6 |
+
# vectorestore
|
| 7 |
+
from langchain.vectorstores import FAISS
|
| 8 |
+
# retrieval chain
|
| 9 |
+
from langchain.chains import RetrievalQA
|
| 10 |
+
# prompt template
|
| 11 |
+
from langchain.prompts import PromptTemplate
|
| 12 |
+
from langchain.memory import ConversationBufferMemory
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_db_retriever(vector_db:str=None):
|
| 16 |
+
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
| 17 |
+
embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)
|
| 18 |
+
|
| 19 |
+
#db = Chroma(persist_directory="./vectorstore/lc-chroma-multi-mpnet-500", embedding_function=embeddings)
|
| 20 |
+
#db.get()
|
| 21 |
+
if not vector_db:
|
| 22 |
+
FAISS_INDEX_PATH='./vectorstore/py-faiss-multi-mpnet-500'
|
| 23 |
+
else:
|
| 24 |
+
FAISS_INDEX_PATH=vector_db
|
| 25 |
+
db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
|
| 26 |
+
|
| 27 |
+
retriever = db.as_retriever()
|
| 28 |
+
|
| 29 |
+
return retriever
|
rag-system-anatomy/load_data_from_urls.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# documents loader function
|
| 2 |
+
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
| 3 |
+
from bs4 import BeautifulSoup as Soup
|
| 4 |
+
from validators import url as url_validator
|
| 5 |
+
from langchain_core.documents import Document
|
| 6 |
+
|
| 7 |
+
def load_docs_from_urls(
|
| 8 |
+
urls: list = ["https://docs.python.org/3/"],
|
| 9 |
+
max_depth: int = 5,
|
| 10 |
+
) -> list[Document]:
|
| 11 |
+
"""
|
| 12 |
+
Load documents from a list of URLs.
|
| 13 |
+
|
| 14 |
+
## Args:
|
| 15 |
+
urls (list, optional): A list of URLs to load documents from. Defaults to ["https://docs.python.org/3/"].
|
| 16 |
+
max_depth (int, optional): Maximum depth to recursively load documents from each URL. Defaults to 5.
|
| 17 |
+
|
| 18 |
+
## Returns:
|
| 19 |
+
list: A list of documents loaded from the given URLs.
|
| 20 |
+
|
| 21 |
+
## Raises:
|
| 22 |
+
ValueError: If any URL in the provided list is invalid.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
docs = []
|
| 26 |
+
for url in urls:
|
| 27 |
+
if not url_validator(url):
|
| 28 |
+
raise ValueError(f"Invalid URL: {url}")
|
| 29 |
+
loader = RecursiveUrlLoader(url=url, max_depth=max_depth, extractor=lambda x: Soup(x, "html.parser").text)
|
| 30 |
+
docs.extend(loader.load())
|
| 31 |
+
print(f"loaded {len(docs)} pages")
|
| 32 |
+
return docs
|
rag-system-anatomy/load_example_embeddings.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# preprocessed vectorstore retrieval
|
| 2 |
+
import boto3
|
| 3 |
+
from botocore import UNSIGNED
|
| 4 |
+
from botocore.client import Config
|
| 5 |
+
import zipfile
|
| 6 |
+
from langchain.vectorstores import FAISS
|
| 7 |
+
from langchain.vectorstores import Chroma
|
| 8 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 9 |
+
|
| 10 |
+
# access .env file
|
| 11 |
+
|
| 12 |
+
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
|
| 13 |
+
|
| 14 |
+
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
| 15 |
+
#model_kwargs = {"device": "cuda"}
|
| 16 |
+
|
| 17 |
+
embeddings = HuggingFaceEmbeddings(
|
| 18 |
+
model_name=model_name,
|
| 19 |
+
# model_kwargs=model_kwargs
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
## FAISS
|
| 23 |
+
FAISS_INDEX_PATH='./vectorstore/lc-faiss-multi-mpnet-500-markdown'
|
| 24 |
+
VS_DESTINATION = FAISS_INDEX_PATH+".zip"
|
| 25 |
+
s3.download_file('rad-rag-demos', 'vectorstores/lc-faiss-multi-mpnet-500-markdown.zip', VS_DESTINATION)
|
| 26 |
+
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
|
| 27 |
+
zip_ref.extractall('./vectorstore/')
|
| 28 |
+
faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
|
| 29 |
+
|
| 30 |
+
## Chroma DB
|
| 31 |
+
chroma_directory="./vectorstore/lc-chroma-multi-mpnet-500-markdown"
|
| 32 |
+
VS_DESTINATION = chroma_directory+".zip"
|
| 33 |
+
s3.download_file('rad-rag-demos', 'vectorstores/lc-chroma-multi-mpnet-500-markdown.zip', VS_DESTINATION)
|
| 34 |
+
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
|
| 35 |
+
zip_ref.extractall('./vectorstore/')
|
| 36 |
+
chromadb = Chroma(persist_directory=chroma_directory, embedding_function=embeddings)
|
| 37 |
+
chromadb.get()
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain
|
| 2 |
+
langchain-community
|
| 3 |
+
langchain_huggingface
|
| 4 |
+
beautifulsoup4
|
| 5 |
+
faiss-cpu
|
| 6 |
+
chromadb
|
| 7 |
+
validators
|
| 8 |
+
sentence_transformers
|
| 9 |
+
typing-extensions
|
| 10 |
+
unstructured
|
| 11 |
+
gradio
|
| 12 |
+
boto3
|
vectorstore/placeholder.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
This file keeps the folder from being deleted for now
|