Spaces:
Build error
Build error
import gradio as gr | |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, StoppingCriteriaList | |
import os | |
import torch | |
theme = "darkgrass" | |
title = "Polyglot(Korean) Demo" | |
model_name = "EleutherAI/polyglot-ko-1.3b" | |
bad_words = [ | |
'...', | |
'....', | |
'(์ค๋ต)', | |
'http' | |
] | |
description = "polyglot (1.3B ํ๋ผ๋ฏธํฐ ์ฌ์ด์ฆ) ํ๊ตญ์ด ๋ชจ๋ธ์ ์์ฐํ๋ ๋ฐ๋ชจํ์ด์ง ์ ๋๋ค." | |
article = "<p style='text-align: center'><a href='https://github.com/EleutherAI/polyglot' target='_blank'>Polyglot: Large Language Models of Well-balanced Competence in Multi-languages</a></p>" | |
examples = [ | |
["CPU์ GPU์ ์ฐจ์ด๋,"], | |
["์ง๋ฌธ: ์ฐํฌ๋ผ์ด๋ ์ ์์ด ์ธ๊ณ3์ฐจ๋์ ์ผ๋ก ํ์ ์ด ๋ ๊น์? \n๋ต๋ณ:"], | |
["2040๋ ๋ฏธ๊ตญ์, "] | |
] | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name | |
) | |
model.eval() | |
pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, eos_token_id=tokenizer.eos_token_id) | |
def predict(text): | |
with torch.no_grad(): | |
tokens = tokenizer(text, return_tensors="pt").input_ids | |
# generate and end generate if <|endoftext|> is not in text | |
gen_tokens = model.generate( | |
tokens, do_sample=True, temperature=0.8, max_new_tokens=64, top_k=50, top_p=0.8, | |
no_repeat_ngram_size=3, repetition_penalty=1.2, | |
bad_words_ids=[ | |
tokenizer.encode(bad_word) for bad_word in bad_words | |
], | |
eos_token_id=tokenizer.eos_token_id, | |
pad_token_id=tokenizer.pad_token_id | |
) | |
generated = tokenizer.batch_decode(gen_tokens)[0] | |
return generated | |
# return pipe(text)[0]['generated_text'] | |
iface = gr.Interface( | |
fn=predict, | |
inputs='text', | |
outputs='text', | |
examples=examples | |
) | |
iface.launch() | |
# print(generated) # print: ์ธ๊ฐ์ฒ๋ผ ์๊ฐํ๊ณ , ํ๋ํ๋ '์ง๋ฅ'์ ํตํด ์ธ๋ฅ๊ฐ ์ด์ ๊น์ง ํ์ง ๋ชปํ๋ ๋ฌธ์ ์ ํด๋ต์ ์ฐพ์ ์ ์์ ๊ฒ์ด๋ค. ๊ณผํ๊ธฐ์ ์ด ๊ณ ๋๋ก ๋ฐ๋ฌํ 21์ธ๊ธฐ๋ฅผ ์ด์๊ฐ ์ฐ๋ฆฌ ์์ด๋ค์๊ฒ ๊ฐ์ฅ ํ์ํ ๊ฒ์ ์ฌ๊ณ ๋ ฅ ํ๋ จ์ด๋ค. ์ฌ๊ณ ๋ ฅ ํ๋ จ์ ํตํด, ์ธ์ | |