# build on your original chatbot from the previous lesson
# a basic chatbot from the previous lesson is below -- edit it to incorporate the changes described above

import gradio as gr
from huggingface_hub import InferenceClient #imports huggingface models

client = InferenceClient("Qwen/Qwen2.5-7B-Instruct-1M")

def respond(message, history):
    messages = [{"role": "system", "content": "I am a kind chatbot."}]

# add all previous messages to the messages list
    if history:
        for user_msg, assistant_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})

    # add the current user's message to the messages list
    messages.append({"role": "user", "content": message})

    # makes the chat completion API call,
    # sending the messages and other parameters to the model
    # implements streaming, where one word/token appears at a time
    response = ""

    # iterate through each message in the method
    for message in client.chat_completion(
        messages,
        max_tokens=500,
        temperature=.1,
        stream=True):

        # add the tokens to the output content
          token = message.choices[0].delta.content # capture the most recent toke
          response += token # Add it to the response
          yield response # yield the response:

chatbot = gr.ChatInterface(respond)

chatbot.launch()