Spaces:
Sleeping
Sleeping
File size: 1,439 Bytes
db389e4 a8f0ad9 db389e4 a8f0ad9 db389e4 a8f0ad9 db389e4 e4e4172 db389e4 05c0c3c e4e4172 db389e4 a8f0ad9 e4e4172 db389e4 e4e4172 db389e4 a8f0ad9 f1d9c86 db389e4 a8f0ad9 e4e4172 a8f0ad9 db389e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# build on your original chatbot from the previous lesson
# a basic chatbot from the previous lesson is below -- edit it to incorporate the changes described above
import gradio as gr
from huggingface_hub import InferenceClient #imports huggingface models
client = InferenceClient("google/gemma-2-2b-it")
def respond(message, history):
messages = [{"role": "system", "content": "I am a kind chatbot."}]
# add all previous messages to the messages list
if history:
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# add the current user's message to the messages list
messages.append({"role": "user", "content": message})
# makes the chat completion API call,
# sending the messages and other parameters to the model
# implements streaming, where one word/token appears at a time
response = ""
# iterate through each message in the method
for message in client.chat_completion(
messages,
max_tokens=500,
temperature=.1,
stream=True):
# add the tokens to the output content
token = message.choices[0].delta.content # capture the most recent toke
response += token # Add it to the response
yield response # yield the response:
chatbot = gr.ChatInterface(respond)
chatbot.launch() |