jnanliu commited on
Commit
7b5f5ba
·
verified ·
1 Parent(s): 82bb9b5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -25,7 +25,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
25
  # load the model and tokenizer
26
  model = AutoModelForCausalLM.from_pretrained(
27
  'jnanliu/LiveMath-Judge',
28
- device_map="auto",
29
  torch_dtype=torch.bfloat16,
30
  )
31
  tokenizer = AutoTokenizer.from_pretrained(
@@ -66,12 +66,12 @@ Analysis:
66
  conversations = [
67
  {'role': 'user', 'content': prompt.format(question=question, gold_answer=golden_answer, answer=generated_answer)}
68
  ]
69
- inputs = tokenizer.apply_chat_template(conversations, return_tensors="pt")
70
 
71
  # do inference
72
  pred = model.generate(
73
- input_ids=inputs["input_ids"].to(model.device),
74
- attention_mask=inputs["attention_mask"].to(model.device),
75
  num_return_sequences=1,
76
  )[0].cpu().tolist()
77
  response = tokenizer.decode(pred, skip_special_tokens=True)
@@ -84,8 +84,8 @@ response = tokenizer.decode(pred, skip_special_tokens=True)
84
  Following is the mG-Pass@16 results of Qwen2.5-72B-Instruct-as-Judge and LiveMath-Judge-as-Judge of 10 models.
85
  |model| Qwen2.5-72B-Instruct | LiveMath-Judge |
86
  | -- | -- | -- |
87
- | Qwen2.5-7B-Instruct | 26.45 | 26.32 |
88
- | Qwen2.5-Math-7B-Instruct | 37.91 | 38.01 |
89
  | Llama-3.1-8B-Instruct | 10.43 | 10.41 |
90
  | Llama-3.1-70B-Instruct | 21.37 | 22.12 |
91
  | Llama-3.3-70B-Instruct | 27.36 | 27.23 |
 
25
  # load the model and tokenizer
26
  model = AutoModelForCausalLM.from_pretrained(
27
  'jnanliu/LiveMath-Judge',
28
+ device_map='auto',
29
  torch_dtype=torch.bfloat16,
30
  )
31
  tokenizer = AutoTokenizer.from_pretrained(
 
66
  conversations = [
67
  {'role': 'user', 'content': prompt.format(question=question, gold_answer=golden_answer, answer=generated_answer)}
68
  ]
69
+ inputs = tokenizer.apply_chat_template(conversations, return_tensors='pt')
70
 
71
  # do inference
72
  pred = model.generate(
73
+ input_ids=inputs['input_ids'].to(model.device),
74
+ attention_mask=inputs['attention_mask'].to(model.device),
75
  num_return_sequences=1,
76
  )[0].cpu().tolist()
77
  response = tokenizer.decode(pred, skip_special_tokens=True)
 
84
  Following is the mG-Pass@16 results of Qwen2.5-72B-Instruct-as-Judge and LiveMath-Judge-as-Judge of 10 models.
85
  |model| Qwen2.5-72B-Instruct | LiveMath-Judge |
86
  | -- | -- | -- |
87
+ | Qwen2.5-7B-Instruct | 26.45 | 28.17 |
88
+ | Qwen2.5-Math-7B-Instruct | 37.91 | 39.54 |
89
  | Llama-3.1-8B-Instruct | 10.43 | 10.41 |
90
  | Llama-3.1-70B-Instruct | 21.37 | 22.12 |
91
  | Llama-3.3-70B-Instruct | 27.36 | 27.23 |