Spaces:

Spestly
/

Nous-1

Running on Zero

App Files Files Community

Nous-1 / app.py

Spestly

Update app.py

5ca330a verified 2 days ago

raw

history blame contribute delete

8.21 kB

	import gradio as gr
	import spaces
	from transformers import pipeline
	import torch

	# Global variable to store pipelines
	model_cache = {}

	# Available models
	AVAILABLE_MODELS = {
	"Nous-1-4B": "apexion-ai/Nous-1-4B",
	"Nous-1-8B": "apexion-ai/Nous-1-8B",
	"Nous-1-2B": "apexion-ai/Nous-1-2B",
	}

	@spaces.GPU
	def initialize_model(model_name):
	global model_cache

	if model_name not in AVAILABLE_MODELS:
	raise ValueError(f"Model {model_name} not found in available models")

	model_id = AVAILABLE_MODELS[model_name]

	# Check if model is already cached
	if model_id not in model_cache:
	try:
	model_cache[model_id] = pipeline(
	"text-generation",
	model=model_id,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)
	except Exception as e:
	# Fallback to CPU if GPU fails
	model_cache[model_id] = pipeline(
	"text-generation",
	model=model_id,
	torch_dtype=torch.float32,
	device_map="cpu",
	trust_remote_code=True
	)

	return model_cache[model_id]

	@spaces.GPU
	def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
	"""Generate response using the selected model"""

	# Initialize model inside the GPU-decorated function
	try:
	model_pipe = initialize_model(model_name)
	except Exception as e:
	return f"Error loading model {model_name}: {str(e)}"

	# Format the conversation history
	messages = []

	# Add conversation history
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current message
	messages.append({"role": "user", "content": message})

	# Generate response
	try:
	# Some models may not support the messages format, so we'll try different approaches
	try:
	# Try with messages format first
	response = model_pipe(
	messages,
	max_length=max_length,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=model_pipe.tokenizer.eos_token_id,
	return_full_text=False
	)
	except:
	# Fallback to simple text format
	conversation_text = ""
	for msg in messages:
	if msg["role"] == "user":
	conversation_text += f"User: {msg['content']}\n"
	else:
	conversation_text += f"Assistant: {msg['content']}\n"
	conversation_text += "Assistant:"

	response = model_pipe(
	conversation_text,
	max_length=max_length,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=model_pipe.tokenizer.eos_token_id,
	return_full_text=False
	)

	# Extract the generated text
	if isinstance(response, list) and len(response) > 0:
	generated_text = response[0]['generated_text']
	else:
	generated_text = str(response)

	# Clean up the response
	if isinstance(generated_text, list):
	assistant_response = generated_text[-1]['content']
	else:
	# Remove the prompt and extract assistant response
	assistant_response = str(generated_text).strip()
	if "Assistant:" in assistant_response:
	assistant_response = assistant_response.split("Assistant:")[-1].strip()

	return assistant_response

	except Exception as e:
	return f"Error generating response: {str(e)}"

	# Create the Gradio interface
	def create_interface():
	with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🚀 Nous-1 Model Chat Interface

	Chat with the Nous-1 models by Apexion AI.

	Available Models:
	- Nous-1-4B (4 billion parameters)
	- Nous-1-8B (8 billion parameters)
	- Nous-1-2B (2 billion parameters)
	""")

	with gr.Row():
	model_selector = gr.Dropdown(
	choices=list(AVAILABLE_MODELS.keys()),
	value="Nous-1-4B",
	label="Select Model",
	info="Choose which model to use for generation"
	)

	chatbot = gr.Chatbot(
	height=400,
	placeholder="Select a model and start chatting...",
	label="Chat"
	)

	msg = gr.Textbox(
	placeholder="Type your message here...",
	label="Message",
	lines=2
	)

	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("Clear Chat", variant="secondary")

	with gr.Accordion("Advanced Settings", open=False):
	max_length = gr.Slider(
	minimum=200,
	maximum=8192,
	value=2048,
	step=50,
	label="Max Length",
	info="Maximum length of generated response"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Controls randomness in generation"
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.1,
	label="Top P",
	info="Controls diversity via nucleus sampling"
	)

	# Event handlers
	def user_message(message, history):
	return "", history + [[message, None]]

	def bot_response(history, model_name, max_len, temp, top_p):
	if history:
	user_message = history[-1][0]
	bot_message = generate_response(
	user_message,
	history[:-1],
	model_name,
	max_len,
	temp,
	top_p
	)
	history[-1][1] = bot_message
	return history

	def model_changed(model_name):
	return gr.update(placeholder=f"Chat with {model_name}...")

	# Wire up the events
	msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
	bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
	)

	submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
	bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
	)

	clear_btn.click(lambda: None, None, chatbot, queue=False)

	model_selector.change(model_changed, model_selector, chatbot)

	gr.Markdown("""
	---

	### About the Nous-1 Models

	Nous-1-2B: 2 billion parameter model by Apexion AI, designed for fast and quick infrencing

	Nous-1-4B: 4 billion parameter model by Apexion AI, optimisd for efficient conversation and text generation

	Nous-1-8B: 8 billion parameter model by Apexion AI, offering enhanced capabilities and better performance for complex tasks

	All models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources.

	This Space uses ZeroGPU for efficient GPU allocation across both model sizes.
	""")

	return demo

	# Launch the app
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()