Nous-1 / app.py
Spestly's picture
Update app.py
5ca330a verified
import gradio as gr
import spaces
from transformers import pipeline
import torch
# Global variable to store pipelines
model_cache = {}
# Available models
AVAILABLE_MODELS = {
"Nous-1-4B": "apexion-ai/Nous-1-4B",
"Nous-1-8B": "apexion-ai/Nous-1-8B",
"Nous-1-2B": "apexion-ai/Nous-1-2B",
}
@spaces.GPU
def initialize_model(model_name):
global model_cache
if model_name not in AVAILABLE_MODELS:
raise ValueError(f"Model {model_name} not found in available models")
model_id = AVAILABLE_MODELS[model_name]
# Check if model is already cached
if model_id not in model_cache:
try:
model_cache[model_id] = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
except Exception as e:
# Fallback to CPU if GPU fails
model_cache[model_id] = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True
)
return model_cache[model_id]
@spaces.GPU
def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
"""Generate response using the selected model"""
# Initialize model inside the GPU-decorated function
try:
model_pipe = initialize_model(model_name)
except Exception as e:
return f"Error loading model {model_name}: {str(e)}"
# Format the conversation history
messages = []
# Add conversation history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Generate response
try:
# Some models may not support the messages format, so we'll try different approaches
try:
# Try with messages format first
response = model_pipe(
messages,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=model_pipe.tokenizer.eos_token_id,
return_full_text=False
)
except:
# Fallback to simple text format
conversation_text = ""
for msg in messages:
if msg["role"] == "user":
conversation_text += f"User: {msg['content']}\n"
else:
conversation_text += f"Assistant: {msg['content']}\n"
conversation_text += "Assistant:"
response = model_pipe(
conversation_text,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=model_pipe.tokenizer.eos_token_id,
return_full_text=False
)
# Extract the generated text
if isinstance(response, list) and len(response) > 0:
generated_text = response[0]['generated_text']
else:
generated_text = str(response)
# Clean up the response
if isinstance(generated_text, list):
assistant_response = generated_text[-1]['content']
else:
# Remove the prompt and extract assistant response
assistant_response = str(generated_text).strip()
if "Assistant:" in assistant_response:
assistant_response = assistant_response.split("Assistant:")[-1].strip()
return assistant_response
except Exception as e:
return f"Error generating response: {str(e)}"
# Create the Gradio interface
def create_interface():
with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸš€ Nous-1 Model Chat Interface
Chat with the Nous-1 models by Apexion AI.
**Available Models:**
- Nous-1-4B (4 billion parameters)
- Nous-1-8B (8 billion parameters)
- Nous-1-2B (2 billion parameters)
""")
with gr.Row():
model_selector = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value="Nous-1-4B",
label="Select Model",
info="Choose which model to use for generation"
)
chatbot = gr.Chatbot(
height=400,
placeholder="Select a model and start chatting...",
label="Chat"
)
msg = gr.Textbox(
placeholder="Type your message here...",
label="Message",
lines=2
)
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat", variant="secondary")
with gr.Accordion("Advanced Settings", open=False):
max_length = gr.Slider(
minimum=200,
maximum=8192,
value=2048,
step=50,
label="Max Length",
info="Maximum length of generated response"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature",
info="Controls randomness in generation"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.1,
label="Top P",
info="Controls diversity via nucleus sampling"
)
# Event handlers
def user_message(message, history):
return "", history + [[message, None]]
def bot_response(history, model_name, max_len, temp, top_p):
if history:
user_message = history[-1][0]
bot_message = generate_response(
user_message,
history[:-1],
model_name,
max_len,
temp,
top_p
)
history[-1][1] = bot_message
return history
def model_changed(model_name):
return gr.update(placeholder=f"Chat with {model_name}...")
# Wire up the events
msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
)
submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
)
clear_btn.click(lambda: None, None, chatbot, queue=False)
model_selector.change(model_changed, model_selector, chatbot)
gr.Markdown("""
---
### About the Nous-1 Models
**Nous-1-2B**: 2 billion parameter model by Apexion AI, designed for fast and quick infrencing
**Nous-1-4B**: 4 billion parameter model by Apexion AI, optimisd for efficient conversation and text generation
**Nous-1-8B**: 8 billion parameter model by Apexion AI, offering enhanced capabilities and better performance for complex tasks
All models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources.
This Space uses ZeroGPU for efficient GPU allocation across both model sizes.
""")
return demo
# Launch the app
if __name__ == "__main__":
demo = create_interface()
demo.launch()