bainskarman commited on
Commit
b5ac3f0
·
verified ·
1 Parent(s): 6b70492

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +25 -21
model.py CHANGED
@@ -1,8 +1,21 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import os
3
  import torch
4
- if torch.cuda.is_available():
5
- model.to('cuda')
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def modelFeedback(ats_score, resume_data, job_description):
7
  """
8
  Generate ATS feedback by utilizing a pre-configured pipeline.
@@ -32,28 +45,19 @@ def modelFeedback(ats_score, resume_data, job_description):
32
  #### Job Description: {job_description}
33
  """
34
 
35
- # Load the tokenizer and model
36
- huggingface_token = os.environ.get("KEY2")
37
- tokenizer = AutoTokenizer.from_pretrained(
38
- "meta-llama/Llama-3.2-1B",
39
- use_auth_token=huggingface_token
40
- )
41
- model = AutoModelForCausalLM.from_pretrained(
42
- "meta-llama/Llama-3.2-1B",
43
- use_auth_token=huggingface_token
44
- )
45
-
46
  try:
47
  # Tokenize the input
48
- input_ids = tokenizer.encode(input_prompt, return_tensors="pt")
49
 
50
- # Generate the output
51
- output = model.generate(
52
- input_ids,
53
- max_length=1500,
54
- temperature=0.01,
55
- pad_token_id=tokenizer.eos_token_id # Ensure padding works properly
56
- )
 
 
57
 
58
  # Decode the output
59
  response_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import os
3
  import torch
4
+
5
+ # Check if CUDA is available for faster inference
6
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
7
+
8
+ # Load the tokenizer and model once, outside of the function
9
+ huggingface_token = os.environ.get("KEY2")
10
+ tokenizer = AutoTokenizer.from_pretrained(
11
+ "meta-llama/Llama-3.2-1B",
12
+ use_auth_token=huggingface_token
13
+ )
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ "meta-llama/Llama-3.2-1B",
16
+ use_auth_token=huggingface_token
17
+ ).to(device)
18
+
19
  def modelFeedback(ats_score, resume_data, job_description):
20
  """
21
  Generate ATS feedback by utilizing a pre-configured pipeline.
 
45
  #### Job Description: {job_description}
46
  """
47
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
  # Tokenize the input
50
+ input_ids = tokenizer.encode(input_prompt, return_tensors="pt").to(device)
51
 
52
+ # Disable gradient calculation for faster inference
53
+ with torch.no_grad():
54
+ # Generate the output
55
+ output = model.generate(
56
+ input_ids,
57
+ max_length=1500,
58
+ temperature=0.01,
59
+ pad_token_id=tokenizer.eos_token_id # Ensure padding works properly
60
+ )
61
 
62
  # Decode the output
63
  response_text = tokenizer.decode(output[0], skip_special_tokens=True)