You need to agree to share your contact information to access this model

This repository is publicly accessible, but you have to accept the conditions to access its files and content.

Log in or Sign Up to review the conditions and access this model content.

import json
import torch
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams

# ์‚ฌ์šฉ์ž ์งˆ๋ฌธ ์˜ˆ์‹œ
user_content = "Please write a short story about a cat who becomes a detective. The story should be humorous and no more than 500 words. Set it in a bustling city."

messages = [
    {"role": "user", "content": user_content}
]


# ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๊ฒฝ๋กœ ์„ค์ •
model_name = "jungseob/qwen3-4b-instruct-clarity"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# vLLM ๋ชจ๋ธ ๋กœ๋“œ
num_gpus = torch.cuda.device_count()
llm = LLM(
    model=model_name,
    tensor_parallel_size=num_gpus,
    trust_remote_code=True
)

# ์ฑ„ํŒ… ํ…œํ”Œ๋ฆฟ์„ ์ ์šฉํ•˜์—ฌ ์ตœ์ข… ํ”„๋กฌํ”„ํŠธ ๋ฌธ์ž์—ด ์ƒ์„ฑ
prompt_string = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

sampling_params = SamplingParams(
    max_tokens=400,
    temperature=0,
    top_p=1.0 
)

outputs = llm.generate([prompt_string], sampling_params)
generated_text = outputs[0].outputs[0].text

print(json.loads(generated_text)) # {'ambiguity_terminology': 10, 'necessary_context': 10, 'scope_feasibility': 9, 'self_containment': 10, 'specificity_constraints': 8, 'task_definition': 9}
Downloads last month
13
Safetensors
Model size
4B params
Tensor type
BF16
ยท
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support