aki-0421/clip-anime-patch400-10k-v1
This is a CLIP model designed for anime character retrieval tasks.
Example
import math
from PIL import Image
from sentence_transformers import SentenceTransformer
def resize_image_for_patch(image: Image.Image, patch_size: int = 14, max_patches: int = 400) -> Image.Image:
orig_width, orig_height = image.size
aspect_ratio = orig_width / orig_height
# Max width and height in pixels under the patch constraint
max_total_pixels = patch_size * math.sqrt(max_patches)
if aspect_ratio >= 1:
# Landscape or square orientation
target_width = patch_size * int(math.floor(math.sqrt(max_patches * aspect_ratio)))
target_height = int(target_width / aspect_ratio)
else:
# Portrait orientation
target_height = patch_size * int(math.floor(math.sqrt(max_patches / aspect_ratio)))
target_width = int(target_height * aspect_ratio)
# Ensure dimensions are multiples of patch_size
target_width -= target_width % patch_size
target_height -= target_height % patch_size
return image.resize((target_width, target_height), Image.BICUBIC)
# Init model
model = SentenceTransformer("aki-0421/clip-anime-patch400-10k-v1", device="cuda")
images = [
resize_image_for_patch(Image.open("/home/aki0421/Share/images/00085.png"))
]
image_embeddings = model.encode(images, convert_to_tensor=True)
sentences = [
"女の子が悲しんでいる。",
"落ち込んでる人",
"泣いている",
"笑っている",
"ピンクの髪の女の子",
"赤い髪の女の子",
"茶色の髪の女の子",
"赤い目",
"青い目",
"曇っている",
"雨が降っている",
"晴れている",
"キッチンにいます。",
"学校にいる",
"魔法少女のようだ",
"戦闘しますか?",
"男性ですか?",
"茶色い髪の女の子が悲しんでいるシーン",
"ピンクの髪の女の子が笑っているシーン"
]
text_embeddings = model.encode(sentences, convert_to_tensor=True)
similarities = model.similarity(text_embeddings, image_embeddings)
print(similarities)
Citation
@misc{
qwen2.5-VL,
title = {Qwen2.5-VL},
url = {https://qwenlm.github.io/blog/qwen2.5-vl/},
author = {Qwen Team},
month = {January},
year = {2025}
}
@misc{
Ruri,
title={{Ruri: Japanese General Text Embeddings}},
author={Hayato Tsukagoshi and Ryohei Sasano},
year={2024},
eprint={2409.07737},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.07737},
}
@misc{
oshizo2024clipqwen,
author = {Oshizo},
title = {japanese-clip-qwen2\_vl},
year = {2024},
howpublished = {\url{https://github.com/oshizo/japanese-clip-qwen2_vl}},
note = {Accessed: 2025-06-08}
}
- Downloads last month
- 76
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support
Model tree for aki-0421/clip-anime-patch400-10k-v1
Base model
Qwen/Qwen2.5-VL-7B-Instruct