|
|
|
import subprocess |
|
import sys |
|
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth", "peft", "bitsandbytes", "accelerate", "transformers"]) |
|
|
|
|
|
|
|
from transformers import AutoTokenizer |
|
from unsloth import FastLanguageModel |
|
|
|
|
|
medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences. |
|
|
|
### Question: |
|
{} |
|
|
|
### Answer: |
|
""" |
|
|
|
|
|
model_name = "Vijayendra/Phi4-MedQA" |
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name=model_name, |
|
max_seq_length=2048, |
|
dtype=None, |
|
load_in_4bit=True, |
|
device_map="auto" |
|
) |
|
|
|
|
|
FastLanguageModel.for_inference(model) |
|
|
|
|
|
medical_question = "What are the common symptoms of diabetes?" |
|
inputs = tokenizer( |
|
[medqa_prompt.format(medical_question)], |
|
return_tensors="pt", |
|
padding=True, |
|
truncation=True, |
|
max_length=1024 |
|
).to("cuda") |
|
|
|
|
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=512, |
|
use_cache=True |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip() |
|
|
|
print(f"Question: {medical_question}") |
|
print(f"Answer: {answer_text}") |
|
|