Spaces:
Runtime error
Runtime error
Update backend/app/train.py
Browse files- backend/app/train.py +1 -1
backend/app/train.py
CHANGED
|
@@ -46,7 +46,7 @@ def load_and_prepare_dataset():
|
|
| 46 |
# === 2. Tokenization ===
|
| 47 |
def tokenize_dataset(dataset, tokenizer):
|
| 48 |
print(f"🔠 Tokenizing {len(dataset)} samples...")
|
| 49 |
-
tokenized = dataset.map(lambda x: tokenizer(x["text"], truncation=True), batched=True)
|
| 50 |
print(f"✅ Tokenized: now has keys {tokenized.column_names}")
|
| 51 |
return tokenized
|
| 52 |
|
|
|
|
| 46 |
# === 2. Tokenization ===
|
| 47 |
def tokenize_dataset(dataset, tokenizer):
|
| 48 |
print(f"🔠 Tokenizing {len(dataset)} samples...")
|
| 49 |
+
tokenized = dataset.map(lambda x: tokenizer(x["text"], truncation=True, max_length=128), batched=True)
|
| 50 |
print(f"✅ Tokenized: now has keys {tokenized.column_names}")
|
| 51 |
return tokenized
|
| 52 |
|