pham thuy tien commited on
Commit
458e3a7
·
verified ·
1 Parent(s): e2db9fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -36,7 +36,7 @@ def cleaning_input(input_text):
36
  text = re.sub(r'\s+', ' ', text)
37
  return text
38
 
39
- def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
40
  """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
41
  Args:
42
  logits: logits distribution shape (vocabulary size)
@@ -66,7 +66,7 @@ def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')
66
  logits[indices_to_remove] = filter_value
67
  return logits
68
 
69
- def sample_seq(model, context, length, device, temperature=1, top_k=0, top_p=0.0):
70
  """ Generates a sequence of tokens
71
  Args:
72
  model: gpt/gpt2 model
@@ -109,7 +109,7 @@ def gene(t,a):
109
  model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
110
  if a=="vi":
111
  model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2vi_CNNvi_3k")
112
- generated_text = sample_seq(model, article, 50, torch.device('cpu'), temperature=1, top_k=10, top_p=0.5)
113
  generated_text = generated_text[0, len(article):].tolist()
114
  text = tokenizer.convert_ids_to_tokens(generated_text, skip_special_tokens=True)
115
  text = tokenizer.convert_tokens_to_string(text)
 
36
  text = re.sub(r'\s+', ' ', text)
37
  return text
38
 
39
+ def top_k_top_p_filtering(logits, top_k=2, top_p=0.0, filter_value=-float('Inf')):
40
  """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
41
  Args:
42
  logits: logits distribution shape (vocabulary size)
 
66
  logits[indices_to_remove] = filter_value
67
  return logits
68
 
69
+ def sample_seq(model, context, length, device, temperature=1, top_k=2, top_p=0.0):
70
  """ Generates a sequence of tokens
71
  Args:
72
  model: gpt/gpt2 model
 
109
  model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
110
  if a=="vi":
111
  model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2vi_CNNvi_3k")
112
+ generated_text = sample_seq(model, article, 50, torch.device('cpu'), temperature=1, top_k=2, top_p=0.5)
113
  generated_text = generated_text[0, len(article):].tolist()
114
  text = tokenizer.convert_ids_to_tokens(generated_text, skip_special_tokens=True)
115
  text = tokenizer.convert_tokens_to_string(text)