jerryzh168 commited on
Commit
a204b4f
·
verified ·
1 Parent(s): 384e9fa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -2
README.md CHANGED
@@ -52,12 +52,29 @@ tokenizer.push_to_hub(save_to)
52
 
53
  # Manual Testing
54
  prompt = "Hey, are you conscious? Can you talk to me?"
55
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  generated_ids = quantized_model.generate(**inputs, max_new_tokens=128)
57
  output_text = tokenizer.batch_decode(
58
  generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
59
  )
60
- print(output_text)
61
 
62
  # Local Benchmark
63
  import torch.utils.benchmark as benchmark
 
52
 
53
  # Manual Testing
54
  prompt = "Hey, are you conscious? Can you talk to me?"
55
+ messages = [
56
+ {
57
+ "role": "system",
58
+ "content": "",
59
+ },
60
+ {"role": "user", "content": prompt},
61
+ ]
62
+ templated_prompt = tokenizer.apply_chat_template(
63
+ messages,
64
+ tokenize=False,
65
+ add_generation_prompt=True,
66
+ )
67
+ print("Prompt:", prompt)
68
+ print("Templated prompt:", templated_prompt)
69
+ inputs = tokenizer(
70
+ templated_prompt,
71
+ return_tensors="pt",
72
+ ).to("cuda")
73
  generated_ids = quantized_model.generate(**inputs, max_new_tokens=128)
74
  output_text = tokenizer.batch_decode(
75
  generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
76
  )
77
+ print("Response:", output_text[0][len(prompt):])
78
 
79
  # Local Benchmark
80
  import torch.utils.benchmark as benchmark