You need to agree to share your contact information to access this model

This repository is publicly accessible, but you have to accept the conditions to access its files and content.

import json
import torch
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams

# 사용자 질문 예시
user_content = "Please write a short story about a cat who becomes a detective. The story should be humorous and no more than 500 words. Set it in a bustling city."

messages = [
    {"role": "user", "content": user_content}
]


# 모델과 토크나이저 경로 설정
model_name = "jungseob/qwen3-4b-instruct-clarity"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# vLLM 모델 로드
num_gpus = torch.cuda.device_count()
llm = LLM(
    model=model_name,
    tensor_parallel_size=num_gpus,
    trust_remote_code=True
)

# 채팅 템플릿을 적용하여 최종 프롬프트 문자열 생성
prompt_string = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

sampling_params = SamplingParams(
    max_tokens=400,
    temperature=0,
    top_p=1.0 
)

outputs = llm.generate([prompt_string], sampling_params)
generated_text = outputs[0].outputs[0].text

print(json.loads(generated_text)) # {'ambiguity_terminology': 10, 'necessary_context': 10, 'scope_feasibility': 9, 'self_containment': 10, 'specificity_constraints': 8, 'task_definition': 9}

Downloads last month: 13

Safetensors

Model size

4B params

Tensor type

BF16