Commit
·
7bef3fd
1
Parent(s):
8fed7be
Update README.md
Browse files
README.md
CHANGED
@@ -28,6 +28,9 @@ This model is [**LlaMa2-7b**](https://huggingface.co/meta-llama/Llama-2-7b) whi
|
|
28 |
# You can load the LlaMa2-CodeGen model on google colab.
|
29 |
|
30 |
|
|
|
|
|
|
|
31 |
### Example
|
32 |
```py
|
33 |
|
@@ -46,9 +49,10 @@ model = PeftModel.from_pretrained(model, peft_model_id)
|
|
46 |
|
47 |
|
48 |
|
|
|
49 |
def create_prompt(instruction):
|
50 |
-
system = "You are a coding assistant that will help the user to resolve the following instruction
|
51 |
-
instruction = "
|
52 |
return system + "\n" + instruction + "\n\n" + "### Response:" + "\n"
|
53 |
|
54 |
def generate(
|
@@ -62,9 +66,10 @@ def generate(
|
|
62 |
):
|
63 |
prompt = create_prompt(instruction)
|
64 |
print(prompt)
|
65 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
66 |
-
input_ids = inputs["input_ids"].to("cuda")
|
67 |
-
attention_mask = inputs["attention_mask"].to("cuda")
|
|
|
68 |
generation_config = GenerationConfig(
|
69 |
temperature=temperature,
|
70 |
top_p=top_p,
|
@@ -74,17 +79,36 @@ def generate(
|
|
74 |
)
|
75 |
with torch.no_grad():
|
76 |
generation_output = model.generate(
|
77 |
-
input_ids=input_ids,
|
78 |
-
attention_mask=attention_mask,
|
|
|
79 |
generation_config=generation_config,
|
80 |
return_dict_in_generate=True,
|
81 |
output_scores=True,
|
82 |
max_new_tokens=max_new_tokens,
|
83 |
early_stopping=True
|
84 |
)
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
|
90 |
instruction = """
|
|
|
28 |
# You can load the LlaMa2-CodeGen model on google colab.
|
29 |
|
30 |
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
### Example
|
35 |
```py
|
36 |
|
|
|
49 |
|
50 |
|
51 |
|
52 |
+
|
53 |
def create_prompt(instruction):
|
54 |
+
system = "You are using the Llam2-CodeGen model, a coding assistant that will help the user to resolve the following instruction:\n"
|
55 |
+
instruction = "### Input: " + instruction
|
56 |
return system + "\n" + instruction + "\n\n" + "### Response:" + "\n"
|
57 |
|
58 |
def generate(
|
|
|
66 |
):
|
67 |
prompt = create_prompt(instruction)
|
68 |
print(prompt)
|
69 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
70 |
+
#input_ids = inputs["input_ids"].to("cuda")
|
71 |
+
#attention_mask = inputs["attention_mask"].to("cuda")
|
72 |
+
|
73 |
generation_config = GenerationConfig(
|
74 |
temperature=temperature,
|
75 |
top_p=top_p,
|
|
|
79 |
)
|
80 |
with torch.no_grad():
|
81 |
generation_output = model.generate(
|
82 |
+
#input_ids=input_ids,
|
83 |
+
#attention_mask=attention_mask,
|
84 |
+
**inputs,
|
85 |
generation_config=generation_config,
|
86 |
return_dict_in_generate=True,
|
87 |
output_scores=True,
|
88 |
max_new_tokens=max_new_tokens,
|
89 |
early_stopping=True
|
90 |
)
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
generated_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
95 |
+
stop_output = "### Input"
|
96 |
+
gen_response = (generated_response.split(stop_output))[0]
|
97 |
+
|
98 |
+
|
99 |
+
#s = generation_output.sequences[0]
|
100 |
+
#output = tokenizer.decode(s, skip_special_tokens=True)
|
101 |
+
#stop_output = "### Input"
|
102 |
+
|
103 |
+
#gen_response = (output.split(stop_output))[0]
|
104 |
+
|
105 |
+
|
106 |
+
#return output.split("### Response:")[1].lstrip("\n")
|
107 |
+
return gen_response
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
|
113 |
|
114 |
instruction = """
|