Spaces:

bs-modeling-metadata
/

metadata_playground

Runtime error

manandey commited on Apr 25, 2023

Commit

9c9d8ef

•

1 Parent(s): e0a6493

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ def generate(html, entity, website_desc, datasource, year, month, title, prompt)
             entity_text = entity_text + " |" + ent + "|"
         entity_text = "entity ||| <ENTITY_CHAIN>"  + entity_text +  " </ENTITY_CHAIN> "
     else:
-        entity_text = ""
     website_desc_text = "Website Description: " + website_desc + " | " if website_desc != "" else ""
     datasource_text = "Datasource: " + datasource + " | " if datasource != "" else ""
     year_text = "Year: " + year + " | " if year != "" else ""
@@ -26,11 +26,12 @@ def generate(html, entity, website_desc, datasource, year, month, title, prompt)
     final_prompt = html_text + year_text + month_text + website_desc_text + title_text + datasource_text + entity_text + prompt
     model = AutoModelForCausalLM.from_pretrained("bs-modeling-metadata/checkpoints_all_04_23", subfolder="checkpoint-30000step")
-    tokenizer = AutoTokenizer.from_pretrained("bs-modeling-metadata/checkpoints_all_04_23", subfolder="tokenizer")
     inputs = tokenizer(final_prompt, return_tensors="pt")
-    outputs = model.generate(**inputs, max_new_tokens=128)
     return tokenizer.batch_decode(outputs, skip_special_tokens=True)

             entity_text = entity_text + " |" + ent + "|"
         entity_text = "entity ||| <ENTITY_CHAIN>"  + entity_text +  " </ENTITY_CHAIN> "
     else:
+        entity_text = "||| "
     website_desc_text = "Website Description: " + website_desc + " | " if website_desc != "" else ""
     datasource_text = "Datasource: " + datasource + " | " if datasource != "" else ""
     year_text = "Year: " + year + " | " if year != "" else ""
     final_prompt = html_text + year_text + month_text + website_desc_text + title_text + datasource_text + entity_text + prompt
     model = AutoModelForCausalLM.from_pretrained("bs-modeling-metadata/checkpoints_all_04_23", subfolder="checkpoint-30000step")
+    tokenizer = AutoTokenizer.from_pretrained("bs-modeling-metadata/checkpoints_all_04_23", subfolder="tokenizer", add_prefix_space=True)
+    bad_words_ids = tokenizer(["<ENTITY_CHAIN>", " </ENTITY_CHAIN> "]).input_ids
     inputs = tokenizer(final_prompt, return_tensors="pt")
+    outputs = model.generate(**inputs, max_new_tokens=128, bad_words_ids=bad_words_ids)
     return tokenizer.batch_decode(outputs, skip_special_tokens=True)