Uploaded model

  • Developed by: junnei
  • License: apache-2.0
  • Finetuned from model : unsloth/qwen3-14b-unsloth-bnb-4bit

This qwen3 model was trained 2x faster with Unsloth and Huggingface's TRL library.

Model load

from unsloth import FastLanguageModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-14B",
    max_seq_length = 2048,   # Context length - can be longer, but uses more memory
    load_in_4bit = True,     # 4bit uses much less memory
)

# 1. Load the PEFT config (LoRA metadata)
peft_model_id = "junnei/qwen3-14b-tft"
config = PeftConfig.from_pretrained(peft_model_id)

# 3. Apply the LoRA weights
model = PeftModel.from_pretrained(model, peft_model_id)

# 4. Tokenizer도 λ‘œλ”©
tokenizer = AutoTokenizer.from_pretrained(peft_model_id, trust_remote_code=True)

# (Optional) λͺ¨λΈ evaluation λͺ¨λ“œλ‘œ μ „ν™˜
model.eval()

Data prep

import json
from datasets import load_dataset, Dataset
from tqdm import tqdm

prompt_template = """λ‹€μŒμ€ TFT κ²Œμž„μ˜ λ³΄λ“œ μƒνƒœμž…λ‹ˆλ‹€. μ•„λž˜λŠ” 각 ν”Œλ ˆμ΄μ–΄μ˜ λ³΄λ“œκ°€ κ°€μ§„ νŠΉμ„±λ“€κ³Ό μœ λ‹› κ΅¬μ„±μž…λ‹ˆλ‹€. μœ λ‹›μ˜ μœ„μΉ˜λŠ” 슬둯번호둜 ν‘œν˜„λ©λ‹ˆλ‹€.

[Player]
- Traits: {player_traits}
- Units:
{player_units}

[Opponent]
- Traits: {opponent_traits}
- Units:
{opponent_units}

이 λ³΄λ“œλ₯Ό λ°”νƒ•μœΌλ‘œ 'player'κ°€ μŠΉλ¦¬ν• μ§€ 'opponent'κ°€ μŠΉλ¦¬ν• μ§€ μ˜ˆμΈ‘ν•΄μ£Όμ„Έμš”."""

def trait_to_str(traits):
    return ', '.join([
        f"{t['Grade']} {t['Count']} {t['Trait']}"
        for t in traits
    ])

def units_to_str(units):
    lines = []
    for pos_str in sorted(units.keys(), key=lambda x: int(x)):
        u = units[pos_str]
        if not u or not u.get("Unit"):
            continue
        unit_name = u["Unit"]
        tier = u.get("Tier", "").replace("Tier ", "").replace("tier ", "")
        tier_star = f"Tier {tier}" if tier else ""
        items = u.get("Items", [])
        item_str = f" with {', '.join(items)}" if items else ""
        lines.append(f"  - Slot {pos_str}: {unit_name} ({tier_star}){item_str}")
    return '\n'.join(lines) if lines else "  (no units)"

def convert_example(example):
    player = example["board"]["player"]
    opponent = example["board"]["opponent"]

    player_traits = trait_to_str(player["traits"])
    opponent_traits = trait_to_str(opponent["traits"])

    player_units = units_to_str(player["units"])
    opponent_units = units_to_str(opponent["units"])

    prompt = prompt_template.format(
        player_traits=player_traits,
        player_units=player_units,
        opponent_traits=opponent_traits,
        opponent_units=opponent_units
    )

    label = "player" if example["is_win"] else "opponent"

    return {
        "conversations": [
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": f"winner : {label}"}
        ]
    }

# 5. 데이터 λ‘œλ”© 및 huggingface Dataset 객체둜 λ³€ν™˜
with open("data.jsonl", "r", encoding="utf-8") as f:
    raw_data = [json.loads(line) for line in f]
dataset = Dataset.from_list(raw_data)

# 6. λŒ€ν™” λ°μ΄ν„°μ…‹μœΌλ‘œ λ³€ν™˜
conversation_dataset = dataset.map(convert_example)

print(conversation_dataset[0]['conversations'])

Validation

import torch
import torch.nn.functional as F
from tqdm import tqdm

def score_sequence(prefix_ids, candidate_ids, model):
    """
    prefix_ids: List[int], candidate_ids: List[int]
    β†’ prefix에 이어 candidateλ₯Ό ν•œ 토큰씩 feedν•˜λ©΄μ„œ log-prob ν•©μ‚°
    """
    input_ids = torch.tensor([prefix_ids], device="cuda")
    total_logprob = 0.0

    for token_id in candidate_ids:
        with torch.no_grad():
            logits = model(input_ids=input_ids).logits[0, -1]
            logprob = F.log_softmax(logits, dim=-1)[token_id].item()
            total_logprob += logprob

        # λ‹€μŒ 단계 계산을 μœ„ν•΄ 토큰을 append
        input_ids = torch.cat([input_ids, torch.tensor([[token_id]], device="cuda")], dim=1)

    return total_logprob

# ────────────────────────────────────────────────────────────────────────────
correct = 0
total = 0

# 후보 μ‹œν€€μŠ€ 토큰 ID
player_ids   = tokenizer.encode(" player",   add_special_tokens=False)
opponent_ids = tokenizer.encode(" opponent", add_special_tokens=False)
max_cand_len = max(len(player_ids), len(opponent_ids))

for text in tqdm(conversation_dataset['conversations'][:1000]):
    # 1) prompt 생성
    full_prompt = tokenizer.apply_chat_template(text, tokenize=False)
    assistant_content = 'winner :'
    assistant_start = full_prompt.find(assistant_content) + len(assistant_content)
    prompt_text = full_prompt[:assistant_start]

    # 2) prefix ν† ν¬λ‚˜μ΄μ¦ˆ
    inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")
    prefix_ids = inputs["input_ids"][0].tolist()

    # 3) 두 후보 μ‹œν€€μŠ€μ˜ λˆ„μ  log-prob 계산
    player_score   = score_sequence(prefix_ids, player_ids,   model)
    opponent_score = score_sequence(prefix_ids, opponent_ids, model)
    pred_ids       = player_ids if player_score > opponent_score else opponent_ids
    pred_token     = "player"    if player_score > opponent_score else "opponent"
    
    log_probs = torch.tensor([player_score, opponent_score])
    probs = torch.softmax(log_probs, dim=0)
    
    pred_score     = probs[0].item() if player_score > opponent_score else probs[1].item()
    
    # 4) generate() κ²°κ³Ό 토큰 μ‹œν€€μŠ€ νšλ“ (μ΅œλŒ€ 후보 길이만큼)
    output_ids = model.generate(
        **inputs,
        max_new_tokens=max_cand_len,
        do_sample=False,
    )[0]
    gen_token_ids = output_ids[len(prefix_ids): len(prefix_ids) + len(pred_ids)]

    # 5) λ§€μΉ­ 검사
    is_match = gen_token_ids.tolist() == pred_ids
    # λ””λ²„κΉ…μš© 좜λ ₯ (μΌμΉ˜ν•˜μ§€ μ•Šμ„ λ•Œλ§Œ)
    if not is_match:
        decoded = tokenizer.decode(gen_token_ids, skip_special_tokens=True)
        print(f"[Mismatch] Score : {pred_score*100:.2f}%, gen_tokens={gen_token_ids.tolist()} ({decoded!r}), "
              f"pred_tokens={pred_ids} ({pred_token})")
    else:
        correct += 1
        if pred_score < 0.9:
            print(f"[Match] Score : {pred_score*100:.2f}%")

    total += 1

print(f"\nAccuracy: {correct}/{total} = {correct/total:.2%}")
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. πŸ™‹ Ask for provider support