Spaces:

huudan12345
/

tl

Sleeping

pham thuy tien commited on Jun 18, 2024

Commit

458e3a7

verified ·

1 Parent(s): e2db9fa

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ def cleaning_input(input_text):
     text = re.sub(r'\s+', ' ', text)
     return text
-def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
     """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
         Args:
             logits: logits distribution shape (vocabulary size)
@@ -66,7 +66,7 @@ def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')
         logits[indices_to_remove] = filter_value
     return logits
-def sample_seq(model, context, length, device, temperature=1, top_k=0, top_p=0.0):
     """ Generates a sequence of tokens
         Args:
             model: gpt/gpt2 model
@@ -109,7 +109,7 @@ def gene(t,a):
     model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
     if a=="vi":
         model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2vi_CNNvi_3k")
-    generated_text = sample_seq(model, article, 50, torch.device('cpu'), temperature=1, top_k=10, top_p=0.5)
     generated_text = generated_text[0, len(article):].tolist()
     text = tokenizer.convert_ids_to_tokens(generated_text, skip_special_tokens=True)
     text = tokenizer.convert_tokens_to_string(text)

     text = re.sub(r'\s+', ' ', text)
     return text
+def top_k_top_p_filtering(logits, top_k=2, top_p=0.0, filter_value=-float('Inf')):
     """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
         Args:
             logits: logits distribution shape (vocabulary size)
         logits[indices_to_remove] = filter_value
     return logits
+def sample_seq(model, context, length, device, temperature=1, top_k=2, top_p=0.0):
     """ Generates a sequence of tokens
         Args:
             model: gpt/gpt2 model
     model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
     if a=="vi":
         model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2vi_CNNvi_3k")
+    generated_text = sample_seq(model, article, 50, torch.device('cpu'), temperature=1, top_k=2, top_p=0.5)
     generated_text = generated_text[0, len(article):].tolist()
     text = tokenizer.convert_ids_to_tokens(generated_text, skip_special_tokens=True)
     text = tokenizer.convert_tokens_to_string(text)