tahsinhasem commited on
Commit
19e6be5
·
verified ·
1 Parent(s): ed23f7d
Files changed (1) hide show
  1. main.py +7 -4
main.py CHANGED
@@ -4,23 +4,26 @@ from huggingface_hub import InferenceClient
4
  import uvicorn
5
  from transformers import pipeline
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
7
 
 
8
 
9
  #Load pre-trained tokenizer and model (Works)
10
  model_name = "mistralai/Mistral-7B-Instruct-v0.1"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  tokenizer.pad_token = tokenizer.eos_token
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_name,
15
  device_map="auto",
16
- torch_dtype="auto"
 
17
  )
18
 
19
 
20
  # Example usage: Generate text
21
  prompt = "<s>[INST] What's the capital of France? [/INST]"
22
 
23
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, return_attention_mask=True, ).to(model.device)
24
  outputs = model.generate(
25
  **inputs,
26
  max_new_tokens=100,
@@ -60,7 +63,7 @@ async def generate_text(item: Item):
60
  # logging.info("Response generated")
61
 
62
  inp =f"<s>[INST] {item.prompt} [/INST]"
63
- inputs = tokenizer(inp, return_tensors="pt", padding=True, return_attention_mask=True, ).to(model.device)
64
 
65
 
66
  # input_ids = tokenizer.encode(item.prompt, return_tensors="pt")
 
4
  import uvicorn
5
  from transformers import pipeline
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
+ import os
8
 
9
+ token = os.getenv("HUGGINGFACE_TOKEN")
10
 
11
  #Load pre-trained tokenizer and model (Works)
12
  model_name = "mistralai/Mistral-7B-Instruct-v0.1"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
14
  tokenizer.pad_token = tokenizer.eos_token
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
  device_map="auto",
18
+ torch_dtype="auto",
19
+ use_auth_token=token
20
  )
21
 
22
 
23
  # Example usage: Generate text
24
  prompt = "<s>[INST] What's the capital of France? [/INST]"
25
 
26
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, return_attention_mask=True ).to(model.device)
27
  outputs = model.generate(
28
  **inputs,
29
  max_new_tokens=100,
 
63
  # logging.info("Response generated")
64
 
65
  inp =f"<s>[INST] {item.prompt} [/INST]"
66
+ inputs = tokenizer(inp, return_tensors="pt", padding=True, return_attention_mask=True ).to(model.device)
67
 
68
 
69
  # input_ids = tokenizer.encode(item.prompt, return_tensors="pt")