File size: 1,439 Bytes
db389e4
 
 
a8f0ad9
db389e4
a8f0ad9
db389e4
a8f0ad9
 
db389e4
e4e4172
db389e4
 
05c0c3c
 
 
e4e4172
db389e4
a8f0ad9
e4e4172
db389e4
 
 
e4e4172
db389e4
 
 
a8f0ad9
f1d9c86
db389e4
 
 
 
 
 
 
a8f0ad9
e4e4172
a8f0ad9
db389e4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# build on your original chatbot from the previous lesson
# a basic chatbot from the previous lesson is below -- edit it to incorporate the changes described above

import gradio as gr
from huggingface_hub import InferenceClient #imports huggingface models

client = InferenceClient("google/gemma-2-2b-it")

def respond(message, history):
    messages = [{"role": "system", "content": "I am a kind chatbot."}]

# add all previous messages to the messages list
    if history:
        for user_msg, assistant_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})

    # add the current user's message to the messages list
    messages.append({"role": "user", "content": message})

    # makes the chat completion API call,
    # sending the messages and other parameters to the model
    # implements streaming, where one word/token appears at a time
    response = ""

    # iterate through each message in the method
    for message in client.chat_completion(
        messages,
        max_tokens=500,
        temperature=.1,
        stream=True):

        # add the tokens to the output content
          token = message.choices[0].delta.content # capture the most recent toke
          response += token # Add it to the response
          yield response # yield the response:

chatbot = gr.ChatInterface(respond)

chatbot.launch()