speed commited on
Commit
60c58b8
·
verified ·
1 Parent(s): 6225e0c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -6
README.md CHANGED
@@ -105,12 +105,28 @@ Please refer to the [Gemma Terms of Use](https://ai.google.dev/gemma/terms), as
105
 
106
  Bibtex:
107
  ```
108
- @inproceedings{sugiura2025clip,
109
- author = {杉浦 一瑳 and 栗田 修平 and 小田 悠介 and 河原大輔 and 岡崎 直観},
110
- month = mar,
111
- series = {言語処理学会第31回年次大会 (NLP2025)},
112
- title = {オープンLLMによる翻訳を活用した日本語 CLIP の開発},
113
- year = {2025}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
116
  ```
 
105
 
106
  Bibtex:
107
  ```
108
+ @inproceedings{sugiura-etal-2025-developing,
109
+ title = "Developing {J}apanese {CLIP} Models Leveraging an Open-weight {LLM} for Large-scale Dataset Translation",
110
+ author = "Sugiura, Issa and
111
+ Kurita, Shuhei and
112
+ Oda, Yusuke and
113
+ Kawahara, Daisuke and
114
+ Okazaki, Naoaki",
115
+ editor = "Ebrahimi, Abteen and
116
+ Haider, Samar and
117
+ Liu, Emmy and
118
+ Haider, Sammar and
119
+ Leonor Pacheco, Maria and
120
+ Wein, Shira",
121
+ booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)",
122
+ month = apr,
123
+ year = "2025",
124
+ address = "Albuquerque, USA",
125
+ publisher = "Association for Computational Linguistics",
126
+ url = "https://aclanthology.org/2025.naacl-srw.15/",
127
+ pages = "162--170",
128
+ ISBN = "979-8-89176-192-6",
129
+ abstract = "CLIP is a foundational model that bridges images and text, widely adopted as a key component in numerous vision-language models.However, the lack of large-scale open Japanese image-text pairs poses a significant barrier to the development of Japanese vision-language models.In this study, we constructed a Japanese image-text pair dataset with 1.5 billion examples using machine translation with open-weight LLMs and pre-trained Japanese CLIP models on the dataset.The performance of the pre-trained models was evaluated across seven benchmark datasets, achieving competitive average scores compared to models of similar size without the need for extensive data curation. However, the results also revealed relatively low performance on tasks specific to Japanese culture, highlighting the limitations of translation-based approaches in capturing cultural nuances. Our dataset, models, and code are publicly available."
130
  }
131
 
132
  ```