Spaces:
Running
on
Zero
Running
on
Zero
Nicolai Berk
commited on
Commit
Β·
818c1d2
1
Parent(s):
eb084fc
Init simple wiki RAG
Browse files- README.md +42 -11
- app.py +59 -63
- requirements.txt +6 -1
README.md
CHANGED
@@ -1,14 +1,45 @@
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
license: mit
|
11 |
-
short_description: A simple RAG demo
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Mini RAG Demo β Retrieval-Augmented Generation on Wikipedia
|
2 |
+
|
3 |
+
This is a lightweight Retrieval-Augmented Generation (RAG) app built with Gradio. It combines semantic search over a mini Wikipedia (`rag-datasets/rag-mini-wikipedia`) corpus with reranking and language generation to answer natural language questions using real documents.
|
4 |
+
|
5 |
---
|
6 |
+
|
7 |
+
## What It Does
|
8 |
+
|
9 |
+
- Embeds a query using a SentenceTransformer (`all-MiniLM-L6-v2`)
|
10 |
+
- Retrieves the top-5 most semantically similar Wikipedia passages using FAISS
|
11 |
+
- Reranks them using a CrossEncoder model (`cross-encoder/ms-marco-MiniLM-L-6-v2`)
|
12 |
+
- Generates an answer using a Hugging Face language model
|
13 |
+
|
|
|
|
|
14 |
---
|
15 |
|
16 |
+
## Tech Stack
|
17 |
+
|
18 |
+
- **Gradio** β Web interface
|
19 |
+
- **FAISS** β Fast dense vector retrieval
|
20 |
+
- **Sentence-Transformers** β Embedding & reranking
|
21 |
+
- **Transformers (Hugging Face)** β Language model for generation
|
22 |
+
- **Hugging Face Datasets** β Mini Wikipedia corpus (`rag-datasets/rag-mini-wikipedia`)
|
23 |
+
|
24 |
+
---
|
25 |
+
|
26 |
+
## Models Used
|
27 |
+
|
28 |
+
| Purpose | Model |
|
29 |
+
|---------------|---------------------------------------------|
|
30 |
+
| Embedding | `all-MiniLM-L6-v2` |
|
31 |
+
| Reranking | `cross-encoder/ms-marco-MiniLM-L-6-v2` |
|
32 |
+
| Generation | `mistralai/Mistral-7B-Instruct-v0.2` *(optional)* or a smaller model |
|
33 |
+
|
34 |
+
---
|
35 |
+
|
36 |
+
## π¦ Running Locally
|
37 |
+
|
38 |
+
To run the app locally:
|
39 |
+
|
40 |
+
```bash
|
41 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/mini-rag-demo
|
42 |
+
cd mini-rag-demo
|
43 |
+
pip install -r requirements.txt
|
44 |
+
python app.py
|
45 |
+
```
|
app.py
CHANGED
@@ -1,64 +1,60 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
)
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
)
|
61 |
-
|
62 |
-
|
63 |
-
if __name__ == "__main__":
|
64 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from datasets import load_dataset
|
3 |
+
from sentence_transformers import SentenceTransformer, CrossEncoder
|
4 |
+
import faiss
|
5 |
+
import torch
|
6 |
+
import numpy as np
|
7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
8 |
+
|
9 |
+
# Load corpus
|
10 |
+
print("Loading dataset...")
|
11 |
+
dataset = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus", split="train[:1000]")
|
12 |
+
corpus = [f"{item['title']}\n{item['text']}" for item in dataset]
|
13 |
+
|
14 |
+
# Embedding model
|
15 |
+
print("Encoding corpus...")
|
16 |
+
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
17 |
+
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
|
18 |
+
corpus_embeddings_np = corpus_embeddings.cpu().numpy()
|
19 |
+
|
20 |
+
# FAISS index
|
21 |
+
index = faiss.IndexFlatL2(corpus_embeddings_np.shape[1])
|
22 |
+
index.add(corpus_embeddings_np)
|
23 |
+
|
24 |
+
# Reranker model
|
25 |
+
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
|
26 |
+
|
27 |
+
# Generator (choose one: local HF model or OpenAI)
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
|
29 |
+
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto", torch_dtype=torch.float16)
|
30 |
+
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150)
|
31 |
+
|
32 |
+
def rag_pipeline(query):
|
33 |
+
# Embed query
|
34 |
+
query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().numpy()
|
35 |
+
|
36 |
+
# Retrieve top-k from FAISS
|
37 |
+
D, I = index.search(query_embedding, k=5)
|
38 |
+
retrieved_docs = [corpus[idx] for idx in I[0]]
|
39 |
+
|
40 |
+
# Rerank
|
41 |
+
rerank_pairs = [[query, doc] for doc in retrieved_docs]
|
42 |
+
scores = reranker.predict(rerank_pairs)
|
43 |
+
reranked_docs = [doc for _, doc in sorted(zip(scores, retrieved_docs), reverse=True)]
|
44 |
+
|
45 |
+
# Combine for context
|
46 |
+
context = "\n\n".join(reranked_docs[:2])
|
47 |
+
prompt = f"""Answer the following question using the provided context.\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"""
|
48 |
+
|
49 |
+
# Generate
|
50 |
+
response = generator(prompt)[0]["generated_text"]
|
51 |
+
return response.split("Answer:")[-1].strip()
|
52 |
+
|
53 |
+
# Gradio UI
|
54 |
+
iface = gr.Interface(fn=rag_pipeline,
|
55 |
+
inputs=gr.Textbox(lines=2, placeholder="Ask something..."),
|
56 |
+
outputs="text",
|
57 |
+
title="Mini RAG Wikipedia Demo",
|
58 |
+
description="Retrieval-Augmented Generation on a small Wikipedia subset.")
|
59 |
+
|
60 |
+
iface.launch()
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1 +1,6 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
transformers
|
3 |
+
datasets
|
4 |
+
sentence-transformers
|
5 |
+
gradio
|
6 |
+
faiss-gpu
|