# build on your original chatbot from the previous lesson # a basic chatbot from the previous lesson is below -- edit it to incorporate the changes described above import gradio as gr from huggingface_hub import InferenceClient #imports huggingface models client = InferenceClient("Qwen/Qwen2.5-7B-Instruct-1M") def respond(message, history): messages = [{"role": "system", "content": "I am a kind chatbot."}] # add all previous messages to the messages list if history: for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) # add the current user's message to the messages list messages.append({"role": "user", "content": message}) # makes the chat completion API call, # sending the messages and other parameters to the model # implements streaming, where one word/token appears at a time response = "" # iterate through each message in the method for message in client.chat_completion( messages, max_tokens=500, temperature=.1, stream=True): # add the tokens to the output content token = message.choices[0].delta.content # capture the most recent toke response += token # Add it to the response yield response # yield the response: chatbot = gr.ChatInterface(respond) chatbot.launch()