{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d797e3ac-c8df-47fc-b349-5d2466d12ca6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting gradio\n", " Downloading gradio-5.12.0-py3-none-any.whl.metadata (16 kB)\n", "Collecting PyMuPDF\n", " Downloading pymupdf-1.25.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.24.1)\n", "Collecting faiss-gpu\n", " Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)\n", "Collecting transformers\n", " Downloading transformers-4.48.0-py3-none-any.whl.metadata (44 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m849.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hCollecting sentence-transformers\n", " Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)\n", "Collecting protobuf\n", " Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)\n", "Collecting tokenizers\n", " Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", "Collecting accelerate\n", " Downloading accelerate-1.2.1-py3-none-any.whl.metadata (19 kB)\n", "Collecting sentencepiece\n", " Downloading sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)\n", "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", " Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n", "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.0.0)\n", "Collecting fastapi<1.0,>=0.115.2 (from gradio)\n", " Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n", "Collecting ffmpy (from gradio)\n", " Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)\n", "Collecting gradio-client==1.5.4 (from gradio)\n", " Downloading gradio_client-1.5.4-py3-none-any.whl.metadata (7.1 kB)\n", "Collecting httpx>=0.24.1 (from gradio)\n", " Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)\n", "Collecting huggingface-hub>=0.25.1 (from gradio)\n", " Downloading huggingface_hub-0.27.1-py3-none-any.whl.metadata (13 kB)\n", "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.2)\n", "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.2)\n", "Collecting orjson~=3.0 (from gradio)\n", " Downloading orjson-3.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (41 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.8/41.8 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (23.2)\n", "Collecting pandas<3.0,>=1.0 (from gradio)\n", " Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.9/89.9 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (9.3.0)\n", "Collecting pydantic>=2.0 (from gradio)\n", " Downloading pydantic-2.10.5-py3-none-any.whl.metadata (30 kB)\n", "Collecting pydub (from gradio)\n", " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", "Collecting python-multipart>=0.0.18 (from gradio)\n", " Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)\n", "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n", "Collecting ruff>=0.2.2 (from gradio)\n", " Downloading ruff-0.9.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)\n", " Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)\n", "Collecting semantic-version~=2.0 (from gradio)\n", " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", "Collecting starlette<1.0,>=0.40.0 (from gradio)\n", " Downloading starlette-0.45.2-py3-none-any.whl.metadata (6.3 kB)\n", "Collecting tomlkit<0.14.0,>=0.12.0 (from gradio)\n", " Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)\n", "Collecting typer<1.0,>=0.12 (from gradio)\n", " Downloading typer-0.15.1-py3-none-any.whl.metadata (15 kB)\n", "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.4.0)\n", "Collecting uvicorn>=0.14.0 (from gradio)\n", " Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==1.5.4->gradio) (2023.4.0)\n", "Collecting websockets<15.0,>=10.0 (from gradio-client==1.5.4->gradio)\n", " Downloading websockets-14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.9.0)\n", "Collecting regex!=2019.12.17 (from transformers)\n", " Downloading regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Collecting safetensors>=0.4.1 (from transformers)\n", " Downloading safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", "Collecting tqdm>=4.27 (from transformers)\n", " Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.7/57.7 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (2.1.0+cu118)\n", "Collecting scikit-learn (from sentence-transformers)\n", " Downloading scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n", "Collecting scipy (from sentence-transformers)\n", " Downloading scipy-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.6)\n", "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (3.4)\n", "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.0)\n", "Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (1.1.3)\n", "Collecting starlette<1.0,>=0.40.0 (from gradio)\n", " Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n", "Collecting typing-extensions~=4.0 (from gradio)\n", " Downloading typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (2022.12.7)\n", "Collecting httpcore==1.* (from httpx>=0.24.1->gradio)\n", " Downloading httpcore-1.0.7-py3-none-any.whl.metadata (21 kB)\n", "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.24.1->gradio)\n", " Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n", "Collecting fsspec (from gradio-client==1.5.4->gradio)\n", " Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2.8.2)\n", "Collecting pytz>=2020.1 (from pandas<3.0,>=1.0->gradio)\n", " Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n", "Collecting tzdata>=2022.7 (from pandas<3.0,>=1.0->gradio)\n", " Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)\n", "Collecting annotated-types>=0.6.0 (from pydantic>=2.0->gradio)\n", " Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n", "Collecting pydantic-core==2.27.2 (from pydantic>=2.0->gradio)\n", " Downloading pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.0)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.1.0)\n", "Collecting click>=8.0.0 (from typer<1.0,>=0.12->gradio)\n", " Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)\n", "Collecting shellingham>=1.3.0 (from typer<1.0,>=0.12->gradio)\n", " Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)\n", "Collecting rich>=10.11.0 (from typer<1.0,>=0.12->gradio)\n", " Downloading rich-13.9.4-py3-none-any.whl.metadata (18 kB)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.1.1)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.13)\n", "Collecting joblib>=1.2.0 (from scikit-learn->sentence-transformers)\n", " Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)\n", "Collecting threadpoolctl>=3.1.0 (from scikit-learn->sentence-transformers)\n", " Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.16.0)\n", "Collecting markdown-it-py>=2.2.0 (from rich>=10.11.0->typer<1.0,>=0.12->gradio)\n", " Downloading markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.16.1)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.3.0)\n", "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio)\n", " Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n", "Downloading gradio-5.12.0-py3-none-any.whl (57.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n", "\u001b[?25hDownloading gradio_client-1.5.4-py3-none-any.whl (321 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m321.4/321.4 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading pymupdf-1.25.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading transformers-4.48.0-py3-none-any.whl (9.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.7/9.7 MB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading sentence_transformers-3.3.1-py3-none-any.whl (268 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl (319 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m319.7/319.7 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n", "\u001b[?25hDownloading accelerate-1.2.1-py3-none-any.whl (336 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m336.4/336.4 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", "Downloading fastapi-0.115.6-py3-none-any.whl (94 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.8/94.8 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpx-0.28.1-py3-none-any.whl (73 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.5/73.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpcore-1.0.7-py3-none-any.whl (78 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.6/78.6 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading huggingface_hub-0.27.1-py3-none-any.whl (450 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m450.7/450.7 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading orjson-3.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.4/130.4 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading pydantic-2.10.5-py3-none-any.whl (431 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m431.4/431.4 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading python_multipart-0.0.20-py3-none-any.whl (24 kB)\n", "Downloading regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (781 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.7/781.7 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading ruff-0.9.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading safehttpx-0.1.6-py3-none-any.whl (8.7 kB)\n", "Downloading safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (461 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m462.0/462.0 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Downloading starlette-0.41.3-py3-none-any.whl (73 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.2/73.2 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading tomlkit-0.13.2-py3-none-any.whl (37 kB)\n", "Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.5/78.5 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading typer-0.15.1-py3-none-any.whl (44 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.9/44.9 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n", "Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading ffmpy-0.5.0-py3-none-any.whl (6.0 kB)\n", "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Downloading scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading scipy-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (40.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.6/40.6 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", "Downloading click-8.1.8-py3-none-any.whl (98 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.2/98.2 kB\u001b[0m \u001b[31m833.6 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.12.0-py3-none-any.whl (183 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m895.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m942.8 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading joblib-1.4.2-py3-none-any.whl (301 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.8/301.8 kB\u001b[0m \u001b[31m975.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m508.0/508.0 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n", "\u001b[?25hDownloading rich-13.9.4-py3-none-any.whl (242 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m242.4/242.4 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n", "Downloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)\n", "Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m346.6/346.6 kB\u001b[0m \u001b[31m974.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading websockets-14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (168 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.2/168.2 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n", "\u001b[?25hDownloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.5/87.5 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", "Installing collected packages: sentencepiece, pytz, pydub, faiss-gpu, websockets, tzdata, typing-extensions, tqdm, tomlkit, threadpoolctl, shellingham, semantic-version, scipy, safetensors, ruff, regex, python-multipart, PyMuPDF, protobuf, orjson, mdurl, joblib, h11, fsspec, ffmpy, click, annotated-types, aiofiles, uvicorn, starlette, scikit-learn, pydantic-core, pandas, markdown-it-py, huggingface-hub, httpcore, tokenizers, rich, pydantic, httpx, accelerate, typer, transformers, safehttpx, gradio-client, fastapi, sentence-transformers, gradio\n", " Attempting uninstall: typing-extensions\n", " Found existing installation: typing_extensions 4.4.0\n", " Uninstalling typing_extensions-4.4.0:\n", " Successfully uninstalled typing_extensions-4.4.0\n", " Attempting uninstall: fsspec\n", " Found existing installation: fsspec 2023.4.0\n", " Uninstalling fsspec-2023.4.0:\n", " Successfully uninstalled fsspec-2023.4.0\n", "Successfully installed PyMuPDF-1.25.1 accelerate-1.2.1 aiofiles-23.2.1 annotated-types-0.7.0 click-8.1.8 faiss-gpu-1.7.2 fastapi-0.115.6 ffmpy-0.5.0 fsspec-2024.12.0 gradio-5.12.0 gradio-client-1.5.4 h11-0.14.0 httpcore-1.0.7 httpx-0.28.1 huggingface-hub-0.27.1 joblib-1.4.2 markdown-it-py-3.0.0 mdurl-0.1.2 orjson-3.10.14 pandas-2.2.3 protobuf-5.29.3 pydantic-2.10.5 pydantic-core-2.27.2 pydub-0.25.1 python-multipart-0.0.20 pytz-2024.2 regex-2024.11.6 rich-13.9.4 ruff-0.9.1 safehttpx-0.1.6 safetensors-0.5.2 scikit-learn-1.6.1 scipy-1.15.1 semantic-version-2.10.0 sentence-transformers-3.3.1 sentencepiece-0.2.0 shellingham-1.5.4 starlette-0.41.3 threadpoolctl-3.5.0 tokenizers-0.21.0 tomlkit-0.13.2 tqdm-4.67.1 transformers-4.48.0 typer-0.15.1 typing-extensions-4.12.2 tzdata-2024.2 uvicorn-0.34.0 websockets-14.1\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install gradio PyMuPDF numpy faiss-gpu transformers sentence-transformers protobuf tokenizers accelerate sentencepiece" ] }, { "cell_type": "code", "execution_count": 3, "id": "e952b997-7a55-4eae-8b44-3be6a82b1bcf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting bitsandbytes\n", " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from bitsandbytes) (2.1.0+cu118)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from bitsandbytes) (1.24.1)\n", "Requirement already satisfied: typing_extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from bitsandbytes) (4.12.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (3.9.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (3.0)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (2024.12.0)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (2.1.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->bitsandbytes) (2.1.2)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->bitsandbytes) (1.3.0)\n", "Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m41.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hInstalling collected packages: bitsandbytes\n", "Successfully installed bitsandbytes-0.45.0\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install bitsandbytes" ] }, { "cell_type": "code", "execution_count": 1, "id": "ba87e888-bde5-4fc9-816b-a4cc4361d1bb", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message\n", "You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9931b1deaeca47309c3ddffc40647368", "version_major": 2, "version_minor": 0 }, "text/plain": [ "pytorch_model.bin.index.json: 0%| | 0.00/33.4k [00:00" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "04ea1db8d4094ed6ba1e997c5631dcad", "version_major": 2, "version_minor": 0 }, "text/plain": [ "modules.json: 0%| | 0.00/349 [00:00