Update README.md
Browse files
README.md
CHANGED
|
@@ -42,7 +42,7 @@ generate_text = pipeline(
|
|
| 42 |
)
|
| 43 |
|
| 44 |
res = generate_text(
|
| 45 |
-
"
|
| 46 |
min_new_tokens=2,
|
| 47 |
max_new_tokens=256,
|
| 48 |
do_sample=False,
|
|
@@ -57,11 +57,11 @@ print(res[0]["generated_text"])
|
|
| 57 |
You can print a sample prompt after the preprocessing step to see how it is feed to the tokenizer:
|
| 58 |
|
| 59 |
```python
|
| 60 |
-
print(generate_text.preprocess("
|
| 61 |
```
|
| 62 |
|
| 63 |
```bash
|
| 64 |
-
<|prompt
|
| 65 |
```
|
| 66 |
|
| 67 |
Alternatively, if you prefer to not use `trust_remote_code=True` you can download [h2oai_pipeline.py](h2oai_pipeline.py), store it alongside your notebook, and construct the pipeline yourself from the loaded model and tokenizer:
|
|
@@ -85,7 +85,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 85 |
generate_text = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer)
|
| 86 |
|
| 87 |
res = generate_text(
|
| 88 |
-
"
|
| 89 |
min_new_tokens=2,
|
| 90 |
max_new_tokens=256,
|
| 91 |
do_sample=False,
|
|
@@ -106,7 +106,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 106 |
model_name = "yukismd/JapaneseQuizChatbot_v1" # either local folder or huggingface model name
|
| 107 |
# Important: The prompt needs to be in the same format the model was trained with.
|
| 108 |
# You can find an example prompt in the experiment logs.
|
| 109 |
-
prompt = "<|prompt
|
| 110 |
|
| 111 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 112 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
|
|
| 42 |
)
|
| 43 |
|
| 44 |
res = generate_text(
|
| 45 |
+
"日本で一番高い山は富士山ですが、二番目に高い山は?",
|
| 46 |
min_new_tokens=2,
|
| 47 |
max_new_tokens=256,
|
| 48 |
do_sample=False,
|
|
|
|
| 57 |
You can print a sample prompt after the preprocessing step to see how it is feed to the tokenizer:
|
| 58 |
|
| 59 |
```python
|
| 60 |
+
print(generate_text.preprocess("日本で一番高い山は富士山ですが、二番目に高い山は?")["prompt_text"])
|
| 61 |
```
|
| 62 |
|
| 63 |
```bash
|
| 64 |
+
<|prompt|>日本で一番高い山は富士山ですが、二番目に高い山は?<|endoftext|><|answer|>
|
| 65 |
```
|
| 66 |
|
| 67 |
Alternatively, if you prefer to not use `trust_remote_code=True` you can download [h2oai_pipeline.py](h2oai_pipeline.py), store it alongside your notebook, and construct the pipeline yourself from the loaded model and tokenizer:
|
|
|
|
| 85 |
generate_text = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer)
|
| 86 |
|
| 87 |
res = generate_text(
|
| 88 |
+
"日本で一番高い山は富士山ですが、二番目に高い山は?",
|
| 89 |
min_new_tokens=2,
|
| 90 |
max_new_tokens=256,
|
| 91 |
do_sample=False,
|
|
|
|
| 106 |
model_name = "yukismd/JapaneseQuizChatbot_v1" # either local folder or huggingface model name
|
| 107 |
# Important: The prompt needs to be in the same format the model was trained with.
|
| 108 |
# You can find an example prompt in the experiment logs.
|
| 109 |
+
prompt = "<|prompt|>日本で一番高い山は富士山ですが、二番目に高い山は?<|endoftext|><|answer|>"
|
| 110 |
|
| 111 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 112 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|