Nicolai Berk commited on
Commit
456bc55
·
1 Parent(s): cdedfb1

Fix data loader

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -8,8 +8,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
8
 
9
  # Load corpus
10
  print("Loading dataset...")
11
- dataset = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus", split="train[:1000]")
12
- corpus = [f"{item['title']}\n{item['text']}" for item in dataset]
13
 
14
  # Embedding model
15
  print("Encoding corpus...")
 
8
 
9
  # Load corpus
10
  print("Loading dataset...")
11
+ dataset = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus")
12
+ corpus = [item for item in dataset["passages"]]
13
 
14
  # Embedding model
15
  print("Encoding corpus...")