Spaces:
Sleeping
Sleeping
Add token
Browse files
main.py
CHANGED
@@ -4,23 +4,26 @@ from huggingface_hub import InferenceClient
|
|
4 |
import uvicorn
|
5 |
from transformers import pipeline
|
6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
7 |
|
|
|
8 |
|
9 |
#Load pre-trained tokenizer and model (Works)
|
10 |
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
|
11 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
tokenizer.pad_token = tokenizer.eos_token
|
13 |
model = AutoModelForCausalLM.from_pretrained(
|
14 |
model_name,
|
15 |
device_map="auto",
|
16 |
-
torch_dtype="auto"
|
|
|
17 |
)
|
18 |
|
19 |
|
20 |
# Example usage: Generate text
|
21 |
prompt = "<s>[INST] What's the capital of France? [/INST]"
|
22 |
|
23 |
-
inputs = tokenizer(prompt, return_tensors="pt", padding=True, return_attention_mask=True
|
24 |
outputs = model.generate(
|
25 |
**inputs,
|
26 |
max_new_tokens=100,
|
@@ -60,7 +63,7 @@ async def generate_text(item: Item):
|
|
60 |
# logging.info("Response generated")
|
61 |
|
62 |
inp =f"<s>[INST] {item.prompt} [/INST]"
|
63 |
-
inputs = tokenizer(inp, return_tensors="pt", padding=True, return_attention_mask=True
|
64 |
|
65 |
|
66 |
# input_ids = tokenizer.encode(item.prompt, return_tensors="pt")
|
|
|
4 |
import uvicorn
|
5 |
from transformers import pipeline
|
6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
+
import os
|
8 |
|
9 |
+
token = os.getenv("HUGGINGFACE_TOKEN")
|
10 |
|
11 |
#Load pre-trained tokenizer and model (Works)
|
12 |
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
|
14 |
tokenizer.pad_token = tokenizer.eos_token
|
15 |
model = AutoModelForCausalLM.from_pretrained(
|
16 |
model_name,
|
17 |
device_map="auto",
|
18 |
+
torch_dtype="auto",
|
19 |
+
use_auth_token=token
|
20 |
)
|
21 |
|
22 |
|
23 |
# Example usage: Generate text
|
24 |
prompt = "<s>[INST] What's the capital of France? [/INST]"
|
25 |
|
26 |
+
inputs = tokenizer(prompt, return_tensors="pt", padding=True, return_attention_mask=True ).to(model.device)
|
27 |
outputs = model.generate(
|
28 |
**inputs,
|
29 |
max_new_tokens=100,
|
|
|
63 |
# logging.info("Response generated")
|
64 |
|
65 |
inp =f"<s>[INST] {item.prompt} [/INST]"
|
66 |
+
inputs = tokenizer(inp, return_tensors="pt", padding=True, return_attention_mask=True ).to(model.device)
|
67 |
|
68 |
|
69 |
# input_ids = tokenizer.encode(item.prompt, return_tensors="pt")
|