Uploaded model
- Developed by: junnei
- License: apache-2.0
- Finetuned from model : unsloth/qwen3-14b-unsloth-bnb-4bit
This qwen3 model was trained 2x faster with Unsloth and Huggingface's TRL library.
Model load
from unsloth import FastLanguageModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen3-14B",
max_seq_length = 2048, # Context length - can be longer, but uses more memory
load_in_4bit = True, # 4bit uses much less memory
)
# 1. Load the PEFT config (LoRA metadata)
peft_model_id = "junnei/qwen3-14b-tft"
config = PeftConfig.from_pretrained(peft_model_id)
# 3. Apply the LoRA weights
model = PeftModel.from_pretrained(model, peft_model_id)
# 4. Tokenizerλ λ‘λ©
tokenizer = AutoTokenizer.from_pretrained(peft_model_id, trust_remote_code=True)
# (Optional) λͺ¨λΈ evaluation λͺ¨λλ‘ μ ν
model.eval()
Data prep
import json
from datasets import load_dataset, Dataset
from tqdm import tqdm
prompt_template = """λ€μμ TFT κ²μμ 보λ μνμ
λλ€. μλλ κ° νλ μ΄μ΄μ 보λκ° κ°μ§ νΉμ±λ€κ³Ό μ λ ꡬμ±μ
λλ€. μ λμ μμΉλ μ¬λ‘―λ²νΈλ‘ ννλ©λλ€.
[Player]
- Traits: {player_traits}
- Units:
{player_units}
[Opponent]
- Traits: {opponent_traits}
- Units:
{opponent_units}
μ΄ λ³΄λλ₯Ό λ°νμΌλ‘ 'player'κ° μΉλ¦¬ν μ§ 'opponent'κ° μΉλ¦¬ν μ§ μμΈ‘ν΄μ£ΌμΈμ."""
def trait_to_str(traits):
return ', '.join([
f"{t['Grade']} {t['Count']} {t['Trait']}"
for t in traits
])
def units_to_str(units):
lines = []
for pos_str in sorted(units.keys(), key=lambda x: int(x)):
u = units[pos_str]
if not u or not u.get("Unit"):
continue
unit_name = u["Unit"]
tier = u.get("Tier", "").replace("Tier ", "").replace("tier ", "")
tier_star = f"Tier {tier}" if tier else ""
items = u.get("Items", [])
item_str = f" with {', '.join(items)}" if items else ""
lines.append(f" - Slot {pos_str}: {unit_name} ({tier_star}){item_str}")
return '\n'.join(lines) if lines else " (no units)"
def convert_example(example):
player = example["board"]["player"]
opponent = example["board"]["opponent"]
player_traits = trait_to_str(player["traits"])
opponent_traits = trait_to_str(opponent["traits"])
player_units = units_to_str(player["units"])
opponent_units = units_to_str(opponent["units"])
prompt = prompt_template.format(
player_traits=player_traits,
player_units=player_units,
opponent_traits=opponent_traits,
opponent_units=opponent_units
)
label = "player" if example["is_win"] else "opponent"
return {
"conversations": [
{"role": "user", "content": prompt},
{"role": "assistant", "content": f"winner : {label}"}
]
}
# 5. λ°μ΄ν° λ‘λ© λ° huggingface Dataset κ°μ²΄λ‘ λ³ν
with open("data.jsonl", "r", encoding="utf-8") as f:
raw_data = [json.loads(line) for line in f]
dataset = Dataset.from_list(raw_data)
# 6. λν λ°μ΄ν°μ
μΌλ‘ λ³ν
conversation_dataset = dataset.map(convert_example)
print(conversation_dataset[0]['conversations'])
Validation
import torch
import torch.nn.functional as F
from tqdm import tqdm
def score_sequence(prefix_ids, candidate_ids, model):
"""
prefix_ids: List[int], candidate_ids: List[int]
β prefixμ μ΄μ΄ candidateλ₯Ό ν ν ν°μ© feedνλ©΄μ log-prob ν©μ°
"""
input_ids = torch.tensor([prefix_ids], device="cuda")
total_logprob = 0.0
for token_id in candidate_ids:
with torch.no_grad():
logits = model(input_ids=input_ids).logits[0, -1]
logprob = F.log_softmax(logits, dim=-1)[token_id].item()
total_logprob += logprob
# λ€μ λ¨κ³ κ³μ°μ μν΄ ν ν°μ append
input_ids = torch.cat([input_ids, torch.tensor([[token_id]], device="cuda")], dim=1)
return total_logprob
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
correct = 0
total = 0
# ν보 μνμ€ ν ν° ID
player_ids = tokenizer.encode(" player", add_special_tokens=False)
opponent_ids = tokenizer.encode(" opponent", add_special_tokens=False)
max_cand_len = max(len(player_ids), len(opponent_ids))
for text in tqdm(conversation_dataset['conversations'][:1000]):
# 1) prompt μμ±
full_prompt = tokenizer.apply_chat_template(text, tokenize=False)
assistant_content = 'winner :'
assistant_start = full_prompt.find(assistant_content) + len(assistant_content)
prompt_text = full_prompt[:assistant_start]
# 2) prefix ν ν¬λμ΄μ¦
inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")
prefix_ids = inputs["input_ids"][0].tolist()
# 3) λ ν보 μνμ€μ λμ log-prob κ³μ°
player_score = score_sequence(prefix_ids, player_ids, model)
opponent_score = score_sequence(prefix_ids, opponent_ids, model)
pred_ids = player_ids if player_score > opponent_score else opponent_ids
pred_token = "player" if player_score > opponent_score else "opponent"
log_probs = torch.tensor([player_score, opponent_score])
probs = torch.softmax(log_probs, dim=0)
pred_score = probs[0].item() if player_score > opponent_score else probs[1].item()
# 4) generate() κ²°κ³Ό ν ν° μνμ€ νλ (μ΅λ ν보 κΈΈμ΄λ§νΌ)
output_ids = model.generate(
**inputs,
max_new_tokens=max_cand_len,
do_sample=False,
)[0]
gen_token_ids = output_ids[len(prefix_ids): len(prefix_ids) + len(pred_ids)]
# 5) λ§€μΉ κ²μ¬
is_match = gen_token_ids.tolist() == pred_ids
# λλ²κΉ
μ© μΆλ ₯ (μΌμΉνμ§ μμ λλ§)
if not is_match:
decoded = tokenizer.decode(gen_token_ids, skip_special_tokens=True)
print(f"[Mismatch] Score : {pred_score*100:.2f}%, gen_tokens={gen_token_ids.tolist()} ({decoded!r}), "
f"pred_tokens={pred_ids} ({pred_token})")
else:
correct += 1
if pred_score < 0.9:
print(f"[Match] Score : {pred_score*100:.2f}%")
total += 1
print(f"\nAccuracy: {correct}/{total} = {correct/total:.2%}")
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
π
Ask for provider support