mistral-7b-medical-o1-ft / longchat_instructions.py

Update longchat_instructions.py

aff1b1a verified about 2 months ago

2.37 kB

	#Use this script to chat with "mistral-7b-medical-o1-ft" that answers your questions until you type '\q' or 'quit' to end the conversation.


	# !pip install unsloth #(install unsloth if not installed)

	from unsloth import FastLanguageModel
	import torch


	# Define the Alpaca prompt template and load model
	alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
	### Instruction: {instruction}
	### Input: {input_text}
	### Response: {output}
	"""

	# Load your model
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name="Subh775/mistral-7b-medical-o1-ft",
	max_seq_length=2048,
	load_in_4bit=True
	)

	# Enable optimized inference mode for faster generation
	FastLanguageModel.for_inference(model)


	# Function to handle the chat loop with memory

	def chat():
	print("Chat with the model! Type '\\q' or 'quit' to stop.\n")

	chat_history = "" # Store the conversation history

	while True:
	# Get user input
	user_input = input("You: ")

	# Exit condition
	if user_input.lower() in ['\\q', 'quit']:
	print("\nExiting the chat. Goodbye!")
	break

	# Append the current input to chat history with instruction formatting
	prompt = alpaca_prompt.format(
	instruction="Please answer the following medical question.",
	input_text=user_input,
	output=""
	)
	chat_history += prompt + "\n"

	# Tokenize combined history and move to GPU
	inputs = tokenizer([chat_history], return_tensors="pt").to("cuda")

	# Generate output with configured parameters
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	top_p=0.9,
	num_return_sequences=1,
	do_sample=True,
	no_repeat_ngram_size=2
	)

	# Decode and clean the model's response
	decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
	clean_output = decoded_output[0].split('### Response:')[-1].strip()

	# Add the response to chat history
	chat_history += f": {clean_output}\n"

	# Display the response
	print(f"\nModel: {clean_output}\n")

	# Start the chat
	chat()