Spaces:
Sleeping
Sleeping
pham thuy tien
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ def cleaning_input(input_text):
|
|
36 |
text = re.sub(r'\s+', ' ', text)
|
37 |
return text
|
38 |
|
39 |
-
def top_k_top_p_filtering(logits, top_k=
|
40 |
""" Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
|
41 |
Args:
|
42 |
logits: logits distribution shape (vocabulary size)
|
@@ -66,7 +66,7 @@ def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')
|
|
66 |
logits[indices_to_remove] = filter_value
|
67 |
return logits
|
68 |
|
69 |
-
def sample_seq(model, context, length, device, temperature=1, top_k=
|
70 |
""" Generates a sequence of tokens
|
71 |
Args:
|
72 |
model: gpt/gpt2 model
|
@@ -109,7 +109,7 @@ def gene(t,a):
|
|
109 |
model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
|
110 |
if a=="vi":
|
111 |
model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2vi_CNNvi_3k")
|
112 |
-
generated_text = sample_seq(model, article, 50, torch.device('cpu'), temperature=1, top_k=
|
113 |
generated_text = generated_text[0, len(article):].tolist()
|
114 |
text = tokenizer.convert_ids_to_tokens(generated_text, skip_special_tokens=True)
|
115 |
text = tokenizer.convert_tokens_to_string(text)
|
|
|
36 |
text = re.sub(r'\s+', ' ', text)
|
37 |
return text
|
38 |
|
39 |
+
def top_k_top_p_filtering(logits, top_k=2, top_p=0.0, filter_value=-float('Inf')):
|
40 |
""" Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
|
41 |
Args:
|
42 |
logits: logits distribution shape (vocabulary size)
|
|
|
66 |
logits[indices_to_remove] = filter_value
|
67 |
return logits
|
68 |
|
69 |
+
def sample_seq(model, context, length, device, temperature=1, top_k=2, top_p=0.0):
|
70 |
""" Generates a sequence of tokens
|
71 |
Args:
|
72 |
model: gpt/gpt2 model
|
|
|
109 |
model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
|
110 |
if a=="vi":
|
111 |
model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2vi_CNNvi_3k")
|
112 |
+
generated_text = sample_seq(model, article, 50, torch.device('cpu'), temperature=1, top_k=2, top_p=0.5)
|
113 |
generated_text = generated_text[0, len(article):].tolist()
|
114 |
text = tokenizer.convert_ids_to_tokens(generated_text, skip_special_tokens=True)
|
115 |
text = tokenizer.convert_tokens_to_string(text)
|