Spaces:

alfredplpl
/

llm-jp-instruct-v2

Paused

File size: 5,099 Bytes

abc65fe
 
 
 
 
b19740e
 
 
abc65fe
 
 
 
 
55ffee0
377528c
abc65fe
 
 
 
 
 
 
 
 
 
55ffee0
abc65fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f19948a
abc65fe
50cb5f9
26f08ee
abc65fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55ffee0
abc65fe
 
 
 
590e6c9
7572cf2
4fffa9e
 
e2f46ea
 
 
44c960c
 
be71825
e2f46ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abc65fe
 
 
 
 
 
 
 
 
 
26f08ee
abc65fe
 
 
 
b86cb8e
abc65fe
b86cb8e
8aefcce
abc65fe
 
 
 
 
b86cb8e
abc65fe

# Ref: https://huggingface.co/spaces/ysharma/Chat_with_Meta_llama3_8b

import gradio as gr
import os
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import TextIteratorStreamer
from threading import Thread


DESCRIPTION = '''
<div>
<h1 style="text-align: center;">LLM-jp v2</h1>
<p>LLM-jp v2 の非公式デモだよ。 <a href="https://huggingface.co/llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0"><b>llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0</b></a>.</p>
</div>
'''

LICENSE = """
<p/>

"""

PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">LLM-jp v2</h1>
   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">なんでもきいてね</p>
</div>
"""


css = """
h1 {
  text-align: center;
  display: block;
}

#duplicate-button {
  margin: auto;
  color: white;
  background: #1565c0;
  border-radius: 100vh;
}
"""

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", torch_dtype=torch.bfloat16)

@spaces.GPU
def chat_llm_jp_v2(message: str, 
              history: list, 
              temperature: float, 
              max_new_tokens: int
             ) -> str:
    """
    Generate a streaming response using the llama3-8b model.
    Args:
        message (str): The input message.
        history (list): The conversation history used by ChatInterface.
        temperature (float): The temperature for generating the response.
        max_new_tokens (int): The maximum number of new tokens to generate.
    Returns:
        str: The generated response.
    """
    conversation = []
    conversation.append({"role": "system", "content": "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。"})
    for user, assistant in history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
    
    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

    generate_kwargs = dict(
        input_ids= input_ids,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=0.95,
        repetition_penalty=1.1,
    )
    # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.             
    if temperature == 0:
        generate_kwargs['do_sample'] = False
        
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)
        print(outputs)
        yield "".join(outputs)
        

# Gradio block
chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')

with gr.Blocks(fill_height=True, css=css) as demo:
    
    gr.Markdown(DESCRIPTION)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
    gr.ChatInterface(
        fn=chat_llm_jp_v2,
        chatbot=chatbot,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Slider(minimum=0.0,
                      maximum=1, 
                      step=0.1,
                      value=0.7, 
                      label="Temperature", 
                      render=False),
            gr.Slider(minimum=128, 
                      maximum=4096,
                      step=1,
                      value=512, 
                      label="Max new tokens", 
                      render=False ),
            ],
        examples=[
            ['小学生にもわかるように相対性理論を教えてください。'],
            ['宇宙の起源を知るための方法をステップ・バイ・ステップで教えてください。'],
            ['1から100までの素数を求めるスクリプトをPythonで書いてください。'],
            ['友達の陽葵にあげる誕生日プレゼントを考えてください。ただし、陽葵は中学生で、私は同じクラスの男性であることを考慮してください。'],
            ['ペンギンがジャングルの王様であることを正当化するように説明してください。']
            ],
        cache_examples=False,
                     )
    
    gr.Markdown(LICENSE)
    
if __name__ == "__main__":
    demo.launch()