mistral-7b-medical-o1-ft / longchat_instructions.py
Subh775's picture
Update longchat_instructions.py
aff1b1a verified
#Use this script to chat with "mistral-7b-medical-o1-ft" that answers your questions until you type '\q' or 'quit' to end the conversation.
# !pip install unsloth #(install unsloth if not installed)
from unsloth import FastLanguageModel
import torch
# Define the Alpaca prompt template and load model
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction: {instruction}
### Input: {input_text}
### Response: {output}
"""
# Load your model
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="Subh775/mistral-7b-medical-o1-ft",
max_seq_length=2048,
load_in_4bit=True
)
# Enable optimized inference mode for faster generation
FastLanguageModel.for_inference(model)
# Function to handle the chat loop with memory
def chat():
print("Chat with the model! Type '\\q' or 'quit' to stop.\n")
chat_history = "" # Store the conversation history
while True:
# Get user input
user_input = input("You: ")
# Exit condition
if user_input.lower() in ['\\q', 'quit']:
print("\nExiting the chat. Goodbye!")
break
# Append the current input to chat history with instruction formatting
prompt = alpaca_prompt.format(
instruction="Please answer the following medical question.",
input_text=user_input,
output=""
)
chat_history += prompt + "\n"
# Tokenize combined history and move to GPU
inputs = tokenizer([chat_history], return_tensors="pt").to("cuda")
# Generate output with configured parameters
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
num_return_sequences=1,
do_sample=True,
no_repeat_ngram_size=2
)
# Decode and clean the model's response
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
clean_output = decoded_output[0].split('### Response:')[-1].strip()
# Add the response to chat history
chat_history += f": {clean_output}\n"
# Display the response
print(f"\nModel: {clean_output}\n")
# Start the chat
chat()