์†Œ๊ฐœ

[2025]ํ•œ๊ตญ์–ด ์–ด๋ฌธ ๊ทœ๋ฒ” ๊ธฐ๋ฐ˜ ์ƒ์„ฑ(RAG)(๊ฐ€ ์œ ํ˜•) ๊ฒฝ์ง„๋Œ€ํšŒ ์ฐธ์—ฌ๋ฅผ ์œ„ํ•ด ๊ฐœ๋ฐœํ•˜์˜€์Šต๋‹ˆ๋‹ค.

  • Developed by: ์ตœ๊ฐ•์ธ๊ณต์ง€๋ŠฅํŒ€

Kanana-1.5-8B Instruct LoRA SFT

Kakao์˜ Kanana-1.5-8B Instruct ๋ชจ๋ธ์„ LoRA ๋ฐฉ์‹์œผ๋กœ SFT ํŒŒ์ธํŠœ๋‹ํ•œ ํ•œ๊ตญ์–ด instruction following ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค.

์‚ฌ์šฉ๋ฒ•

from unsloth import FastLanguageModel
import torch

def load_model_with_adapter(base_model_id, adapter_path, max_seq_length=4096):
    """๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋“œ ํ›„ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ"""
    
    # ๋ฒ ์ด์Šค ๋ชจ๋ธ ๋กœ๋“œ
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=base_model_id,
        max_seq_length=max_seq_length,
        dtype=torch.float16,
        load_in_4bit=False,
        load_in_8bit=False,
        trust_remote_code=True
    )
    
    # padding token ์„ค์ •
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    if tokenizer.unk_token is None:
        tokenizer.unk_token = tokenizer.eos_token
    
    # ์ถ”๋ก  ๋ชจ๋“œ๋กœ ๋ณ€๊ฒฝ
    model = FastLanguageModel.for_inference(model)
    
    # LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ
    model.load_adapter(adapter_path)
    
    return model, tokenizer

def make_chat(inp):
    """์ž…๋ ฅ ๋ฐ์ดํ„ฐ๋ฅผ ์ฑ„ํŒ… ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
    question_type_instructions = {
        "์„ ๋‹คํ˜•": (
            "[์งˆ๋ฌธ]์„ ์ž˜ ์ฝ๊ณ  ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜์‹œ์˜ค. ๋ฌธ์ œ๋ฅผ ๊ทธ๋Œ€๋กœ ์ถœ๋ ฅํ•˜์ง€ ๋งˆ์‹œ์˜ค. "
            "[์ง€์นจ] ์ฃผ์–ด์ง„ ๋ณด๊ธฐ ์ค‘์—์„œ ๊ฐ€์žฅ ์ ์ ˆํ•œ ๋‹ต์„ ์ˆซ์ž๋กœ๋งŒ ์‘๋‹ตํ•˜์‹œ์˜ค."
        ),
        "์„œ์ˆ ํ˜•": (
            "[์งˆ๋ฌธ]์„ ์ž˜ ์ฝ๊ณ  ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜์‹œ์˜ค. ๋ฌธ์ œ๋ฅผ ๊ทธ๋Œ€๋กœ ์ถœ๋ ฅํ•˜์ง€ ๋งˆ์‹œ์˜ค. "
            "[์ง€์นจ] ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์™„์„ฑ๋œ ๋ฌธ์žฅ์œผ๋กœ ์„œ์ˆ ํ•˜์‹œ์˜ค."
        ),
        "๋‹จ๋‹ตํ˜•": (
            "[์งˆ๋ฌธ]์„ ์ž˜ ์ฝ๊ณ  ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜์‹œ์˜ค. ๋ฌธ์ œ๋ฅผ ๊ทธ๋Œ€๋กœ ์ถœ๋ ฅํ•˜์ง€ ๋งˆ์‹œ์˜ค. "
            "[์ง€์นจ] ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต์„ 2๋‹จ์–ด ์ด๋‚ด๋กœ ๊ฐ„๋‹จํžˆ ๋‹ตํ•˜์‹œ์˜ค."
        ),
        "๊ต์ •ํ˜•": (
            "[์งˆ๋ฌธ]์„ ์ž˜ ์ฝ๊ณ  ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜์‹œ์˜ค. ๋ฌธ์ œ๋ฅผ ๊ทธ๋Œ€๋กœ ์ถœ๋ ฅํ•˜์ง€ ๋งˆ์‹œ์˜ค. "
            "[์ง€์นจ] ์ฃผ์–ด์ง„ ๋ฌธ์žฅ์ด ์˜ฌ๋ฐ”๋ฅธ์ง€ ํŒ๋‹จํ•˜๊ณ , ํ‹€๋ฆฐ ๊ฒฝ์šฐ ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ๊ต์ •ํ•˜์—ฌ \"~๊ฐ€ ์˜ณ๋‹ค.\" ํ˜•ํƒœ๋กœ ๋‹ต๋ณ€ํ•˜๊ณ , ๊ทธ ์ด์œ ๋ฅผ ์„ค๋ช…ํ•˜์‹œ์˜ค."
        ),
        "์„ ํƒํ˜•": (
            "[์งˆ๋ฌธ]์„ ์ž˜ ์ฝ๊ณ  ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜์‹œ์˜ค. ๋ฌธ์ œ๋ฅผ ๊ทธ๋Œ€๋กœ ์ถœ๋ ฅํ•˜์ง€ ๋งˆ์‹œ์˜ค. "
            "[์ง€์นจ] ์ฃผ์–ด์ง„ ๋ณด๊ธฐ๋“ค ์ค‘์—์„œ ๊ฐ€์žฅ ์ ์ ˆํ•œ ๊ฒƒ์„ ์„ ํƒํ•˜์—ฌ \"~๊ฐ€ ์˜ณ๋‹ค.\" ํ˜•ํƒœ๋กœ ๋‹ต๋ณ€ํ•˜๊ณ , ๊ทธ ์ด์œ ๋ฅผ ์„ค๋ช…ํ•˜์‹œ์˜ค."
        )
    }
    
    # instruction ๊ฐ€์ ธ์˜ค๊ธฐ
    instruction = question_type_instructions.get(inp.get('question_type', ''), "")
    
    # RAG ์ปจํ…์ŠคํŠธ ์ถ”๊ฐ€
    if 'retrieved_context' in inp and inp['retrieved_context']:
        instruction += f" [๊ด€๋ จ ์ •๋ณด] {inp['retrieved_context']}"
    
    # ๊ธฐํƒ€ ์ •๋ณด ์ƒ์„ฑ (question, retrieved_context ์ œ์™ธ)
    other_info = {k: v for k, v in inp.items() if k not in ['question', 'retrieved_context']}
    
    chat_parts = [instruction]
    if other_info:
        info_list = ["[๊ธฐํƒ€ ์ •๋ณด]"]
        for key, value in other_info.items():
            if value is not None and value != "":
                info_list.append(f" {key}: {value}")
        chat_parts.append(" ".join(info_list))
    
    # ์งˆ๋ฌธ ์ถ”๊ฐ€
    chat_parts.append(f"[์งˆ๋ฌธ] {inp['question']}")
    
    return " ".join(chat_parts)

# ์‚ฌ์šฉ ์˜ˆ์‹œ
base_model_id = "kakaocorp/kanana-1.5-8b-instruct-2505"
adapter_path = "demoap3909/kanana-1.5-8b-instruct-2505-lora-kli-sft-25"

model, tokenizer = load_model_with_adapter(base_model_id, adapter_path)

# ๋ฐ์ดํ„ฐ ์˜ˆ์‹œ
sample_data = {
    "id": "623",
    "question_type": "์„ ํƒํ˜•",
    "question": "\"๋‚˜๋Š” ๊ทธ๋ฅผ ๋ณธ ์ ์ด ์žˆ์Œ์„ {๊ธฐ์–ตํ•ด๋ƒˆ๋‹ค/๊ธฐ์–ตํ•ด ๋ƒˆ๋‹ค}.\" ๊ฐ€์šด๋ฐ ์˜ฌ๋ฐ”๋ฅธ ๊ฒƒ์„ ์„ ํƒํ•˜๊ณ , ๊ทธ ์ด์œ ๋ฅผ ์„ค๋ช…ํ•˜์„ธ์š”.",
    "retrieved_context": "<๋„์–ด์“ฐ๊ธฐ - ํ•œ๊ธ€ ๋งž์ถค๋ฒ• ์ œ42ํ•ญ> ์˜์กด ๋ช…์‚ฌ๋Š” ๋„์–ด ์“ด๋‹ค. ์•„๋Š” ๊ฒƒ์ด ํž˜์ด๋‹ค., ๋‚˜๋„ ํ•  ์ˆ˜ ์žˆ๋‹ค., ๋จน์„ ๋งŒํผ ๋จน์–ด๋ผ., ์•„๋Š” ์ด๋ฅผ ๋งŒ๋‚ฌ๋‹ค., ๋„ค๊ฐ€ ๋œปํ•œ ๋ฐ”๋ฅผ ์•Œ๊ฒ ๋‹ค., ๊ทธ๊ฐ€ ๋– ๋‚œ ์ง€๊ฐ€ ์˜ค๋ž˜๋‹ค. <ํ•œ๊ธ€ ๋งž์ถค๋ฒ•, ํ‘œ์ค€์–ด ๊ทœ์ • - ํ•œ๊ธ€ ๋งž์ถค๋ฒ• ์ œ57ํ•ญ> ์žˆ๊ฑฐ๋ผ. ๋Š” ์ด๋ณด๋‹ค์˜์กด ๋ช…์‚ฌ ์˜ค๋Š” ์ด๊ฐ€ ๊ฐ€๋Š” ์ด๋ณด๋‹ค ๋งŽ๋‹ค. ์œผ๋ฆฌ๋งŒํผ์–ด๋ฏธ ๋‚˜๋ฅผ ๋ฏธ์›Œํ•˜๋ฆฌ๋งŒํผ ๊ทธ์—๊ฒŒ ์ž˜๋ชปํ•œ ์ผ์ด ์—†๋‹ค. ์œผ ์ด๋งŒํผ์˜์กด ๋ช…์‚ฌ ์ฐฌ์„ฑํ•  ์ด๋„ ๋ฐ˜๋Œ€ํ•  ์ด๋งŒํผ์ด๋‚˜ ๋งŽ์„ ๊ฒƒ์ด๋‹ค. ์œผ๋Ÿฌ๋ชฉ์  ๊ณต๋ถ€ํ•˜๋Ÿฌ ๊ฐ„๋‹ค. ์œผ๋ ค์˜๋„ ์„œ์šธ ๊ฐ€๋ ค ํ•œ๋‹ค. ์œผ๋กœ์„œ์ž๊ฒฉ ์‚ฌ๋žŒ์œผ๋กœ์„œ ๊ทธ๋Ÿด ์ˆ˜๋Š” ์—†๋‹ค. ์œผ๋กœ์จ์ˆ˜๋‹จ ๋‹ญ์œผ๋กœ์จ ๊ฟฉ์„ ๋Œ€์‹ ํ–ˆ๋‹ค. ์œผ๋ฏ€๋กœ์–ด๋ฏธ ๊ทธ๊ฐ€ ๋‚˜๋ฅผ ๋ฏฟ์œผ๋ฏ€๋กœ ๋‚˜๋„ ๊ทธ๋ฅผ ๋ฏฟ๋Š”๋‹ค. , ์Œ์œผ๋กœ์จ์กฐ์‚ฌ ๊ทธ๋Š” ๋ฏฟ์Œ์œผ๋กœ์จ ์‚ฐ ๋ณด๋žŒ์„ ๋А๊ผˆ๋‹ค. <ํ•œ๊ธ€ ๋งž์ถค๋ฒ•, ํ‘œ์ค€์–ด ๊ทœ์ • - ํ•œ๊ธ€ ๋งž์ถค๋ฒ• ์ œ36ํ•ญ> ๋’ค์— ์–ด๊ฐ€ ์™€์„œ ๋กœ ์ค„ ์ ์—๋Š” ์ค€ ๋Œ€๋กœ ์ ๋Š”๋‹ค. ๋ณธ๋ง ๊ฐ€์ง€์–ด, ๊ฒฌ๋””์–ด, ๋‹ค๋‹ˆ์–ด, ๋ง‰ํžˆ์–ด, ๋ฒ„ํ‹ฐ์–ด, ์น˜์ด์–ด, ๊ฐ€์ง€์—ˆ๋‹ค, ๊ฒฌ๋””์—ˆ๋‹ค, ๋‹ค๋‹ˆ์—ˆ๋‹ค, ๋ง‰ํžˆ์—ˆ๋‹ค, ๋ฒ„ํ‹ฐ์—ˆ๋‹ค, ์น˜์ด์—ˆ๋‹ค ์ค€๋ง ๊ฐ€์ ธ, ๊ฒฌ๋ŽŒ, ๋‹ค๋…€, ๋ง‰ํ˜€, ๋ฒ„ํ…จ, ์น˜์—ฌ, ๊ฐ€์กŒ๋‹ค, ๊ฒฌ๋Ž ๋‹ค, ๋‹ค๋…”๋‹ค, ๋ง‰ํ˜”๋‹ค, ๋ฒ„ํ…ผ๋‹ค, ์น˜์˜€ ๋‹ค"
}

# ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
user_prompt = make_chat(sample_data)

# ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
system_prompt = (
    "You are a helpful AI assistant. Please answer the user's questions kindly. "
    "๋‹น์‹ ์€ ํ•œ๊ตญ์˜ ์ „ํ†ต ๋ฌธํ™”์™€ ์—ญ์‚ฌ, ๋ฌธ๋ฒ•, ์‚ฌํšŒ, ๊ณผํ•™๊ธฐ์ˆ  ๋“ฑ ๋‹ค์–‘ํ•œ ๋ถ„์•ผ์— ๋Œ€ํ•ด ์ž˜ ์•Œ๊ณ  ์žˆ๋Š” ์œ ๋Šฅํ•œ AI ์–ด์‹œ์Šคํ„ดํŠธ ์ž…๋‹ˆ๋‹ค. "
    "์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด ์นœ์ ˆํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”. ๋‹จ, ๋™์ผํ•œ ๋ฌธ์žฅ์„ ์ ˆ๋Œ€ ๋ฐ˜๋ณตํ•˜์ง€ ๋งˆ์‹œ์˜ค."
)

message = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt},
]

# ํ† ํฌ๋‚˜์ด์ฆˆ
inputs = tokenizer.apply_chat_template(
    message,
    add_generation_prompt=True,
    return_tensors="pt",
).to(model.device)

attention_mask = (inputs != tokenizer.pad_token_id).long().to(model.device)

# ์ƒ์„ฑ
outputs = model.generate(
    inputs,
    max_new_tokens=2048,
    do_sample=False,
    attention_mask=attention_mask,
)

# ๋‹ต๋ณ€ ์ถ”์ถœ
answer = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)

# ํ›„์ฒ˜๋ฆฌ
if answer.startswith("๋‹ต๋ณ€: "):
    answer = answer[4:]
elif answer.startswith("๋‹ต๋ณ€:"):
    answer = answer[3:]

if "#" in answer:
    answer = answer.split("#")[0].strip()

print(answer)
Downloads last month
5
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support

Model tree for demoap3909/kanana-1.5-8b-instruct-2505-lora-kli-sft-25

Adapter
(2)
this model