Spaces:

kaitwithkwk
/

testing-3.7

Sleeping

testing-3.7 / app.py

Update app.py

f1d9c86 verified 13 days ago

1.44 kB

	# build on your original chatbot from the previous lesson
	# a basic chatbot from the previous lesson is below -- edit it to incorporate the changes described above

	import gradio as gr
	from huggingface_hub import InferenceClient #imports huggingface models

	client = InferenceClient("google/gemma-2-2b-it")

	def respond(message, history):
	messages = [{"role": "system", "content": "I am a kind chatbot."}]

	# add all previous messages to the messages list
	if history:
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})

	# add the current user's message to the messages list
	messages.append({"role": "user", "content": message})

	# makes the chat completion API call,
	# sending the messages and other parameters to the model
	# implements streaming, where one word/token appears at a time
	response = ""

	# iterate through each message in the method
	for message in client.chat_completion(
	messages,
	max_tokens=500,
	temperature=.1,
	stream=True):

	# add the tokens to the output content
	token = message.choices[0].delta.content # capture the most recent toke
	response += token # Add it to the response
	yield response # yield the response:

	chatbot = gr.ChatInterface(respond)

	chatbot.launch()