bainskarman commited on
Commit
3abe7d4
·
verified ·
1 Parent(s): b5ac3f0

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +13 -27
model.py CHANGED
@@ -1,20 +1,16 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
- import os
3
  import torch
 
4
 
5
- # Check if CUDA is available for faster inference
6
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
7
 
8
- # Load the tokenizer and model once, outside of the function
9
- huggingface_token = os.environ.get("KEY2")
10
- tokenizer = AutoTokenizer.from_pretrained(
11
- "meta-llama/Llama-3.2-1B",
12
- use_auth_token=huggingface_token
 
13
  )
14
- model = AutoModelForCausalLM.from_pretrained(
15
- "meta-llama/Llama-3.2-1B",
16
- use_auth_token=huggingface_token
17
- ).to(device)
18
 
19
  def modelFeedback(ats_score, resume_data, job_description):
20
  """
@@ -46,21 +42,11 @@ def modelFeedback(ats_score, resume_data, job_description):
46
  """
47
 
48
  try:
49
- # Tokenize the input
50
- input_ids = tokenizer.encode(input_prompt, return_tensors="pt").to(device)
51
 
52
- # Disable gradient calculation for faster inference
53
- with torch.no_grad():
54
- # Generate the output
55
- output = model.generate(
56
- input_ids,
57
- max_length=1500,
58
- temperature=0.01,
59
- pad_token_id=tokenizer.eos_token_id # Ensure padding works properly
60
- )
61
-
62
- # Decode the output
63
- response_text = tokenizer.decode(output[0], skip_special_tokens=True)
64
  return response_text
65
  except Exception as e:
66
  print(f"Error during generation: {e}")
 
 
 
1
  import torch
2
+ from transformers import pipeline
3
 
4
+ # Define model id
5
+ model_id = "meta-llama/Llama-3.2-1B"
6
 
7
+ # Create pipeline for text generation with bfloat16 precision and device auto-placement
8
+ pipe = pipeline(
9
+ "text-generation",
10
+ model=model_id,
11
+ torch_dtype=torch.bfloat16,
12
+ device_map="auto"
13
  )
 
 
 
 
14
 
15
  def modelFeedback(ats_score, resume_data, job_description):
16
  """
 
42
  """
43
 
44
  try:
45
+ # Generate the feedback using the pre-configured pipeline
46
+ response = pipe(input_prompt, max_length=1500, num_return_sequences=1)
47
 
48
+ # Extract the generated text
49
+ response_text = response[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
50
  return response_text
51
  except Exception as e:
52
  print(f"Error during generation: {e}")