|
import gradio as gr |
|
import spaces |
|
from transformers import pipeline |
|
import torch |
|
|
|
|
|
model_cache = {} |
|
|
|
|
|
AVAILABLE_MODELS = { |
|
"Nous-1-4B": "apexion-ai/Nous-1-4B", |
|
"Nous-1-8B": "apexion-ai/Nous-1-8B", |
|
"Nous-1-2B": "apexion-ai/Nous-1-2B", |
|
} |
|
|
|
@spaces.GPU |
|
def initialize_model(model_name): |
|
global model_cache |
|
|
|
if model_name not in AVAILABLE_MODELS: |
|
raise ValueError(f"Model {model_name} not found in available models") |
|
|
|
model_id = AVAILABLE_MODELS[model_name] |
|
|
|
|
|
if model_id not in model_cache: |
|
try: |
|
model_cache[model_id] = pipeline( |
|
"text-generation", |
|
model=model_id, |
|
torch_dtype=torch.float16, |
|
device_map="auto", |
|
trust_remote_code=True |
|
) |
|
except Exception as e: |
|
|
|
model_cache[model_id] = pipeline( |
|
"text-generation", |
|
model=model_id, |
|
torch_dtype=torch.float32, |
|
device_map="cpu", |
|
trust_remote_code=True |
|
) |
|
|
|
return model_cache[model_id] |
|
|
|
@spaces.GPU |
|
def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9): |
|
"""Generate response using the selected model""" |
|
|
|
|
|
try: |
|
model_pipe = initialize_model(model_name) |
|
except Exception as e: |
|
return f"Error loading model {model_name}: {str(e)}" |
|
|
|
|
|
messages = [] |
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
messages.append({"role": "user", "content": user_msg}) |
|
if assistant_msg: |
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
response = model_pipe( |
|
messages, |
|
max_length=max_length, |
|
temperature=temperature, |
|
top_p=top_p, |
|
do_sample=True, |
|
pad_token_id=model_pipe.tokenizer.eos_token_id, |
|
return_full_text=False |
|
) |
|
except: |
|
|
|
conversation_text = "" |
|
for msg in messages: |
|
if msg["role"] == "user": |
|
conversation_text += f"User: {msg['content']}\n" |
|
else: |
|
conversation_text += f"Assistant: {msg['content']}\n" |
|
conversation_text += "Assistant:" |
|
|
|
response = model_pipe( |
|
conversation_text, |
|
max_length=max_length, |
|
temperature=temperature, |
|
top_p=top_p, |
|
do_sample=True, |
|
pad_token_id=model_pipe.tokenizer.eos_token_id, |
|
return_full_text=False |
|
) |
|
|
|
|
|
if isinstance(response, list) and len(response) > 0: |
|
generated_text = response[0]['generated_text'] |
|
else: |
|
generated_text = str(response) |
|
|
|
|
|
if isinstance(generated_text, list): |
|
assistant_response = generated_text[-1]['content'] |
|
else: |
|
|
|
assistant_response = str(generated_text).strip() |
|
if "Assistant:" in assistant_response: |
|
assistant_response = assistant_response.split("Assistant:")[-1].strip() |
|
|
|
return assistant_response |
|
|
|
except Exception as e: |
|
return f"Error generating response: {str(e)}" |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown(""" |
|
# π Nous-1 Model Chat Interface |
|
|
|
Chat with the Nous-1 models by Apexion AI. |
|
|
|
**Available Models:** |
|
- Nous-1-4B (4 billion parameters) |
|
- Nous-1-8B (8 billion parameters) |
|
- Nous-1-2B (2 billion parameters) |
|
""") |
|
|
|
with gr.Row(): |
|
model_selector = gr.Dropdown( |
|
choices=list(AVAILABLE_MODELS.keys()), |
|
value="Nous-1-4B", |
|
label="Select Model", |
|
info="Choose which model to use for generation" |
|
) |
|
|
|
chatbot = gr.Chatbot( |
|
height=400, |
|
placeholder="Select a model and start chatting...", |
|
label="Chat" |
|
) |
|
|
|
msg = gr.Textbox( |
|
placeholder="Type your message here...", |
|
label="Message", |
|
lines=2 |
|
) |
|
|
|
with gr.Row(): |
|
submit_btn = gr.Button("Send", variant="primary") |
|
clear_btn = gr.Button("Clear Chat", variant="secondary") |
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
max_length = gr.Slider( |
|
minimum=200, |
|
maximum=8192, |
|
value=2048, |
|
step=50, |
|
label="Max Length", |
|
info="Maximum length of generated response" |
|
) |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=2.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature", |
|
info="Controls randomness in generation" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.9, |
|
step=0.1, |
|
label="Top P", |
|
info="Controls diversity via nucleus sampling" |
|
) |
|
|
|
|
|
def user_message(message, history): |
|
return "", history + [[message, None]] |
|
|
|
def bot_response(history, model_name, max_len, temp, top_p): |
|
if history: |
|
user_message = history[-1][0] |
|
bot_message = generate_response( |
|
user_message, |
|
history[:-1], |
|
model_name, |
|
max_len, |
|
temp, |
|
top_p |
|
) |
|
history[-1][1] = bot_message |
|
return history |
|
|
|
def model_changed(model_name): |
|
return gr.update(placeholder=f"Chat with {model_name}...") |
|
|
|
|
|
msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then( |
|
bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot |
|
) |
|
|
|
submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then( |
|
bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot |
|
) |
|
|
|
clear_btn.click(lambda: None, None, chatbot, queue=False) |
|
|
|
model_selector.change(model_changed, model_selector, chatbot) |
|
|
|
gr.Markdown(""" |
|
--- |
|
|
|
### About the Nous-1 Models |
|
|
|
**Nous-1-2B**: 2 billion parameter model by Apexion AI, designed for fast and quick infrencing |
|
|
|
**Nous-1-4B**: 4 billion parameter model by Apexion AI, optimisd for efficient conversation and text generation |
|
|
|
**Nous-1-8B**: 8 billion parameter model by Apexion AI, offering enhanced capabilities and better performance for complex tasks |
|
|
|
All models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources. |
|
|
|
This Space uses ZeroGPU for efficient GPU allocation across both model sizes. |
|
""") |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |